Add rank support in metadata

This commit is contained in:
Rodrigo Arias 2021-12-10 18:20:31 +01:00
parent 6c35d632f2
commit 1b1ca7e232
7 changed files with 155 additions and 3 deletions

View File

@ -0,0 +1,44 @@
#ParaverCFG
ConfigFile.Version: 3.4
ConfigFile.NumWindows: 1
################################################################################
< NEW DISPLAYING WINDOW CPU: nOS-V task rank+1 of the RUNNING thread >
################################################################################
window_name CPU: nOS-V task rank+1 of the RUNNING thread
window_type single
window_id 1
window_position_x 300
window_position_y 300
window_width 954
window_height 236
window_comm_lines_enabled true
window_flags_enabled true
window_noncolor_mode true
window_color_mode window_in_null_gradient_mode
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true
window_comm_tagsize true
window_comm_typeval true
window_units Microseconds
window_maximum_y 4.000000000000
window_minimum_y 1.000000000000
window_compute_y_max true
window_level thread
window_scale_relative 1.000000000000
window_end_time_relative 1.000000000000
window_object appl { 1, { All } }
window_begin_time_relative 0.000000000000
window_open true
window_drawmode draw_randnotzero
window_drawmode_rows draw_randnotzero
window_pixel_size 1
window_labels_to_draw 1
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
window_filter_module evt_type 1 74
window_filter_module evt_type_label 1 "Unknown"
window_synchronize 1

View File

@ -0,0 +1,44 @@
#ParaverCFG
ConfigFile.Version: 3.4
ConfigFile.NumWindows: 1
################################################################################
< NEW DISPLAYING WINDOW Thread: nOS-V task rank+1 of the RUNNING thread >
################################################################################
window_name Thread: nOS-V task rank+1 of the RUNNING thread
window_type single
window_id 1
window_position_x 300
window_position_y 300
window_width 954
window_height 236
window_comm_lines_enabled true
window_flags_enabled true
window_noncolor_mode true
window_color_mode window_in_null_gradient_mode
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true
window_comm_tagsize true
window_comm_typeval true
window_units Microseconds
window_maximum_y 4.000000000000
window_minimum_y 1.000000000000
window_compute_y_max true
window_level thread
window_scale_relative 1.000000000000
window_end_time_relative 1.000000000000
window_object appl { 1, { All } }
window_begin_time_relative 0.000000000000
window_open true
window_drawmode draw_randnotzero
window_drawmode_rows draw_randnotzero
window_pixel_size 1
window_labels_to_draw 1
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
window_filter_module evt_type 1 24
window_filter_module evt_type_label 1 "Unknown"
window_synchronize 1

4
emu.h
View File

@ -148,6 +148,7 @@ enum chan {
CHAN_NOSV_TYPEID, CHAN_NOSV_TYPEID,
CHAN_NOSV_APPID, CHAN_NOSV_APPID,
CHAN_NOSV_SUBSYSTEM, CHAN_NOSV_SUBSYSTEM,
CHAN_NOSV_RANK,
CHAN_TAMPI_MODE, CHAN_TAMPI_MODE,
CHAN_OPENMP_MODE, CHAN_OPENMP_MODE,
@ -189,6 +190,7 @@ static const int chan_to_prvtype[CHAN_MAX][3] = {
{ CHAN_NOSV_TYPEID, 21, 71 }, { CHAN_NOSV_TYPEID, 21, 71 },
{ CHAN_NOSV_APPID, 22, 72 }, { CHAN_NOSV_APPID, 22, 72 },
{ CHAN_NOSV_SUBSYSTEM, 23, 73 }, { CHAN_NOSV_SUBSYSTEM, 23, 73 },
{ CHAN_NOSV_RANK, 24, 74 },
{ CHAN_TAMPI_MODE, 30, 80 }, { CHAN_TAMPI_MODE, 30, 80 },
{ CHAN_OPENMP_MODE, 40, 90 }, { CHAN_OPENMP_MODE, 40, 90 },
{ CHAN_NODES_SUBSYSTEM, 50, 100 }, { CHAN_NODES_SUBSYSTEM, 50, 100 },
@ -304,6 +306,7 @@ struct ovni_eproc {
int index; int index;
int gindex; int gindex;
int appid; int appid;
int rank;
/* The loom of the current process */ /* The loom of the current process */
struct ovni_loom *loom; struct ovni_loom *loom;
@ -389,6 +392,7 @@ struct ovni_loom {
size_t ncpus; size_t ncpus;
size_t offset_ncpus; size_t offset_ncpus;
struct ovni_cpu *cpu; struct ovni_cpu *cpu;
int rank_enabled;
int64_t clock_offset; int64_t clock_offset;

View File

@ -50,6 +50,7 @@ hook_init_nosv(struct ovni_emu *emu)
chan_th_init(th, uth, CHAN_NOSV_TASKID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_th, clock); chan_th_init(th, uth, CHAN_NOSV_TASKID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_th, clock);
chan_th_init(th, uth, CHAN_NOSV_TYPEID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_th, clock); chan_th_init(th, uth, CHAN_NOSV_TYPEID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_th, clock);
chan_th_init(th, uth, CHAN_NOSV_APPID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_th, clock); chan_th_init(th, uth, CHAN_NOSV_APPID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_th, clock);
chan_th_init(th, uth, CHAN_NOSV_RANK, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_th, clock);
/* We allow threads to emit subsystem events in cooling and /* We allow threads to emit subsystem events in cooling and
* warming states as well, as they may be allocating memory. * warming states as well, as they may be allocating memory.
@ -68,6 +69,7 @@ hook_init_nosv(struct ovni_emu *emu)
chan_cpu_init(cpu, ucpu, CHAN_NOSV_TASKID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock); chan_cpu_init(cpu, ucpu, CHAN_NOSV_TASKID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock);
chan_cpu_init(cpu, ucpu, CHAN_NOSV_TYPEID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock); chan_cpu_init(cpu, ucpu, CHAN_NOSV_TYPEID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock);
chan_cpu_init(cpu, ucpu, CHAN_NOSV_APPID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock); chan_cpu_init(cpu, ucpu, CHAN_NOSV_APPID, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock);
chan_cpu_init(cpu, ucpu, CHAN_NOSV_RANK, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock);
chan_cpu_init(cpu, ucpu, CHAN_NOSV_SUBSYSTEM, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock); chan_cpu_init(cpu, ucpu, CHAN_NOSV_SUBSYSTEM, CHAN_TRACK_TH_RUNNING, 0, 0, 1, row, prv_cpu, clock);
} }
} }
@ -264,6 +266,9 @@ pre_task_running(struct ovni_emu *emu, struct nosv_task *task)
chan_set(&th->chan[CHAN_NOSV_TYPEID], task->type_id); chan_set(&th->chan[CHAN_NOSV_TYPEID], task->type_id);
chan_set(&th->chan[CHAN_NOSV_APPID], proc->appid); chan_set(&th->chan[CHAN_NOSV_APPID], proc->appid);
if(emu->cur_loom->rank_enabled)
chan_set(&th->chan[CHAN_NOSV_RANK], proc->rank + 1);
chan_push(&th->chan[CHAN_NOSV_SUBSYSTEM], ST_NOSV_TASK_RUNNING); chan_push(&th->chan[CHAN_NOSV_SUBSYSTEM], ST_NOSV_TASK_RUNNING);
} }
@ -278,6 +283,9 @@ pre_task_not_running(struct ovni_emu *emu)
chan_set(&th->chan[CHAN_NOSV_TYPEID], 0); chan_set(&th->chan[CHAN_NOSV_TYPEID], 0);
chan_set(&th->chan[CHAN_NOSV_APPID], 0); chan_set(&th->chan[CHAN_NOSV_APPID], 0);
if(emu->cur_loom->rank_enabled)
chan_set(&th->chan[CHAN_NOSV_RANK], 0);
chan_pop(&th->chan[CHAN_NOSV_SUBSYSTEM], ST_NOSV_TASK_RUNNING); chan_pop(&th->chan[CHAN_NOSV_SUBSYSTEM], ST_NOSV_TASK_RUNNING);
} }

18
ovni.c
View File

@ -191,6 +191,24 @@ proc_set_app(int appid)
die("json_object_set_number for app_id failed\n"); die("json_object_set_number for app_id failed\n");
} }
void
ovni_proc_set_rank(int rank, int nranks)
{
if(!rproc.ready)
die("ovni_proc_set_rank: process not yet initialized\n");
JSON_Object *meta = json_value_get_object(rproc.meta);
if(meta == NULL)
die("json_value_get_object failed\n");
if(json_object_set_number(meta, "rank", rank) != 0)
die("json_object_set_number for rank failed\n");
if(json_object_set_number(meta, "nranks", nranks) != 0)
die("json_object_set_number for nranks failed\n");
}
void void
ovni_proc_init(int app, const char *loom, int pid) ovni_proc_init(int app, const char *loom, int pid)
{ {

3
ovni.h
View File

@ -128,6 +128,9 @@ struct ovni_rproc {
void ovni_proc_init(int app, const char *loom, int pid); void ovni_proc_init(int app, const char *loom, int pid);
/* Sets the MPI rank of the current process and the number of total nranks */
void ovni_proc_set_rank(int rank, int nranks);
void ovni_proc_fini(void); void ovni_proc_fini(void);
void ovni_thread_init(pid_t tid); void ovni_thread_init(pid_t tid);

37
trace.c
View File

@ -97,7 +97,7 @@ load_thread(struct ovni_ethread *thread, struct ovni_eproc *proc, int index, int
} }
static void static void
load_proc_metadata(struct ovni_eproc *proc) load_proc_metadata(struct ovni_eproc *proc, int *rank_enabled)
{ {
JSON_Object *meta; JSON_Object *meta;
@ -105,7 +105,23 @@ load_proc_metadata(struct ovni_eproc *proc)
if(meta == NULL) if(meta == NULL)
die("load_proc_metadata: json_value_get_object() failed\n"); die("load_proc_metadata: json_value_get_object() failed\n");
proc->appid = (int) json_object_get_number(meta, "app_id"); JSON_Value *appid_val = json_object_get_value(meta, "app_id");
if(appid_val == NULL)
die("process %d is missing app_id in metadata\n", proc->pid);
proc->appid = (int) json_number(appid_val);
JSON_Value *rank_val = json_object_get_value(meta, "rank");
if(rank_val != NULL)
{
proc->rank = (int) json_number(rank_val);
*rank_enabled = 1;
}
else
{
proc->rank = -1;
}
} }
@ -141,7 +157,7 @@ load_proc(struct ovni_eproc *proc, struct ovni_loom *loom, int index, int pid, c
} }
/* The appid is populated from the metadata */ /* The appid is populated from the metadata */
load_proc_metadata(proc); load_proc_metadata(proc, &loom->rank_enabled);
if((dir = opendir(procdir)) == NULL) if((dir = opendir(procdir)) == NULL)
{ {
@ -211,6 +227,7 @@ load_loom(struct ovni_loom *loom, char *loomdir)
return -1; return -1;
} }
loom->rank_enabled = 0;
loom->nprocs = count_dir_prefix(dir, "proc"); loom->nprocs = count_dir_prefix(dir, "proc");
if(loom->nprocs <= 0) if(loom->nprocs <= 0)
@ -259,6 +276,20 @@ load_loom(struct ovni_loom *loom, char *loomdir)
closedir(dir); closedir(dir);
/* Ensure all process have the rank, if enabled in any */
if(loom->rank_enabled)
{
for(i = 0; i < loom->nprocs; i++)
{
struct ovni_eproc *proc = &loom->proc[i];
if(proc->rank < 0)
{
die("process %d is missing the rank\n",
proc->pid);
}
}
}
return 0; return 0;
} }