diff --git a/CHANGELOG.md b/CHANGELOG.md index ea4d93d..7a0eef9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add support OpenMP label and task ID views. - Add support for nOS-V non-blocking scheduler server events (`VSN` and `VSn`). +- Add OpenMP simple breakdown view. ## [1.11.0] - 2024-11-08 diff --git a/cfg/cpu/openmp/breakdown.cfg b/cfg/cpu/openmp/breakdown.cfg new file mode 100644 index 0000000..dc684a6 --- /dev/null +++ b/cfg/cpu/openmp/breakdown.cfg @@ -0,0 +1,44 @@ +#ParaverCFG +ConfigFile.Version: 3.4 +ConfigFile.NumWindows: 1 + + +################################################################################ +< NEW DISPLAYING WINDOW CPU: OpenMP Runtime/Label breakdown > +################################################################################ +window_name CPU: OpenMP Runtime/Label breakdown +window_type single +window_id 1 +window_position_x 100 +window_position_y 100 +window_width 600 +window_height 150 +window_comm_lines_enabled false +window_flags_enabled false +window_noncolor_mode true +window_custom_color_enabled true +window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126} {100.000000000000:0,100,0},{101.000000000000:162,155,60},{102.000000000000:124,0,0} +window_logical_filtered true +window_physical_filtered false +window_comm_fromto true +window_comm_tagsize true +window_comm_typeval true +window_units Microseconds +window_maximum_y 1000.0 +window_minimum_y 1.0 +window_compute_y_max true +window_level thread +window_scale_relative 1.000000000000 +window_end_time_relative 1.000000000000 +window_object appl { 1, { All } } +window_begin_time_relative 0.000000000000 +window_open true +window_drawmode draw_randnotzero +window_drawmode_rows draw_randnotzero +window_pixel_size 1 +window_labels_to_draw 1 +window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } +window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } +window_filter_module evt_type 1 53 +window_filter_module evt_type_label 1 "CPU: OpenMP Runtime/Label breakdown" + diff --git a/doc/user/emulation/openmp.md b/doc/user/emulation/openmp.md index 6269635..41c8038 100644 --- a/doc/user/emulation/openmp.md +++ b/doc/user/emulation/openmp.md @@ -177,6 +177,17 @@ currently running on each thread. The ID is a monotonically increasing identifier assigned on task creation. Lower IDs correspond to tasks created at an earlier point than higher IDs. +# Breakdown (simple) + +A simplified view for the breakdown is generated when the emulator is run with +the `-b` flag, the trace is stored in `openmp-breakdown.prv`. This breakdown +view selects the label when it has a value or the subsystem otherwise. The view +is sorted so that rows with same values are grouped together. + +Notice that unlike nOS-V or Nanos6, we don't include yet the information about +the runtime waiting or making progress, but some information can be inferred +from the subsystem states. + ## Limitations As the compiler generates the code that perform the calls to the libompv diff --git a/src/emu/CMakeLists.txt b/src/emu/CMakeLists.txt index 871acf6..a263428 100644 --- a/src/emu/CMakeLists.txt +++ b/src/emu/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC) +# Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC) # SPDX-License-Identifier: GPL-3.0-or-later include_directories( @@ -65,6 +65,7 @@ add_library(emu STATIC kernel/event.c openmp/setup.c openmp/event.c + openmp/breakdown.c ) target_link_libraries(emu ovni-static) diff --git a/src/emu/emu_prv.h b/src/emu/emu_prv.h index eaec1eb..63174c5 100644 --- a/src/emu/emu_prv.h +++ b/src/emu/emu_prv.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC) +/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC) * SPDX-License-Identifier: GPL-3.0-or-later */ #ifndef EMU_PRV_H @@ -35,6 +35,7 @@ enum emu_prv_types { PRV_OPENMP_SUBSYSTEM = 50, PRV_OPENMP_LABEL = 51, PRV_OPENMP_TASKID = 52, + PRV_OPENMP_BREAKDOWN = 53, PRV_OVNI_MARK = 100, /* User marks [100, 200) */ PRV_RESERVED = 200, diff --git a/src/emu/nanos6/setup.c b/src/emu/nanos6/setup.c index bd906df..3875430 100644 --- a/src/emu/nanos6/setup.c +++ b/src/emu/nanos6/setup.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC) +/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC) * SPDX-License-Identifier: GPL-3.0-or-later */ #include "nanos6_priv.h" @@ -313,7 +313,7 @@ model_nanos6_create(struct emu *emu) extend_set(&emu->ext, model_id, e); if (model_nanos6_breakdown_create(emu) != 0) { - err("model_nanos6_breakdown_connect failed"); + err("model_nanos6_breakdown_create failed"); return -1; } diff --git a/src/emu/openmp/breakdown.c b/src/emu/openmp/breakdown.c new file mode 100644 index 0000000..e758d8c --- /dev/null +++ b/src/emu/openmp/breakdown.c @@ -0,0 +1,234 @@ +/* Copyright (c) 2024-2025 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "breakdown.h" +#include +#include +#include "bay.h" +#include "chan.h" +#include "common.h" +#include "cpu.h" +#include "emu.h" +#include "emu_args.h" +#include "emu_prv.h" +#include "extend.h" +#include "model_cpu.h" +#include "mux.h" +#include "openmp_priv.h" +#include "proc.h" +#include "pv/pcf.h" +#include "pv/prf.h" +#include "pv/prv.h" +#include "pv/pvt.h" +#include "recorder.h" +#include "sort.h" +#include "system.h" +#include "task.h" +#include "track.h" +#include "value.h" + +enum { + MUX0_LABEL = 0, + MUX0_SUBSYSTEM = 1, +}; + +static int +create_cpu(struct bay *bay, struct breakdown_cpu *bcpu, int64_t gindex) +{ + enum chan_type t = CHAN_SINGLE; + chan_init(&bcpu->out, t, "openmp.cpu%"PRIi64".breakdown.out", gindex); + + if (bay_register(bay, &bcpu->out) != 0) { + err("bay_register out failed"); + return -1; + } + + return 0; +} + +int +model_openmp_breakdown_create(struct emu *emu) +{ + if (emu->args.breakdown == 0) + return 0; + + struct openmp_emu *memu = EXT(emu, 'P'); + struct breakdown_emu *bemu = &memu->breakdown; + + /* Count phy cpus */ + struct system *sys = &emu->system; + int64_t nphycpus = (int64_t) (sys->ncpus - sys->nlooms); + bemu->nphycpus = nphycpus; + + /* Create a new Paraver trace */ + struct recorder *rec = &emu->recorder; + bemu->pvt = recorder_add_pvt(rec, "openmp-breakdown", (long) nphycpus); + if (bemu->pvt == NULL) { + err("recorder_add_pvt failed"); + return -1; + } + + if (sort_init(&bemu->sort, &emu->bay, nphycpus, "openmp.breakdown.sort") != 0) { + err("sort_init failed"); + return -1; + } + + for (struct cpu *cpu = sys->cpus; cpu; cpu = cpu->next) { + if (cpu->is_virtual) + continue; + + struct openmp_cpu *mcpu = EXT(cpu, 'P'); + struct breakdown_cpu *bcpu = &mcpu->breakdown; + + if (create_cpu(&emu->bay, bcpu, cpu->gindex) != 0) { + err("create_cpu failed"); + return -1; + } + } + + return 0; +} + +static int +select_mux0(struct mux *mux, struct value value, struct mux_input **input) +{ + if (value.type != VALUE_NULL) + *input = mux_get_input(mux, MUX0_LABEL); /* label */ + else + *input = mux_get_input(mux, MUX0_SUBSYSTEM); /* subsystem */ + + return 0; +} + +static int +connect_cpu(struct bay *bay, struct openmp_cpu *mcpu) +{ + struct breakdown_cpu *bcpu = &mcpu->breakdown; + + /* Channel aliases */ + struct chan *subsystem = &mcpu->m.track[CH_SUBSYSTEM].ch; + struct chan *label = &mcpu->m.track[CH_LABEL].ch; + struct chan *out = &bcpu->out; + + if (mux_init(&bcpu->mux0, bay, label, out, select_mux0, 2) != 0) { + err("mux_init failed for mux0"); + return -1; + } + + if (mux_set_input(&bcpu->mux0, MUX0_LABEL, label) != 0) { + err("mux_set_input subsystem failed"); + return -1; + } + + if (mux_set_input(&bcpu->mux0, MUX0_SUBSYSTEM, subsystem) != 0) { + err("mux_set_input label failed"); + return -1; + } + + return 0; +} + +int +model_openmp_breakdown_connect(struct emu *emu) +{ + if (emu->args.breakdown == 0) + return 0; + + struct openmp_emu *memu = EXT(emu, 'P'); + struct breakdown_emu *bemu = &memu->breakdown; + struct bay *bay = &emu->bay; + struct system *sys = &emu->system; + + int64_t i = 0; + for (struct cpu *cpu = sys->cpus; cpu; cpu = cpu->next) { + if (cpu->is_virtual) + continue; + + struct openmp_cpu *mcpu = EXT(cpu, 'P'); + struct breakdown_cpu *bcpu = &mcpu->breakdown; + + /* Connect tri channels and muxes */ + if (connect_cpu(bay, mcpu) != 0) { + err("connect_cpu failed"); + return -1; + } + + /* Connect out to sort */ + if (sort_set_input(&bemu->sort, i, &bcpu->out) != 0) { + err("sort_set_input failed"); + return -1; + } + + /* Connect out to PRV */ + struct prv *prv = pvt_get_prv(bemu->pvt); + long type = PRV_OPENMP_BREAKDOWN; + long flags = PRV_SKIPDUP; + + /* We may emit zero at the start, when an input changes and all + * the other sort output channels write a zero in the output, + * before the last value is set in prv.c. */ + flags |= PRV_ZERO; + + struct chan *out = sort_get_output(&bemu->sort, i); + if (prv_register(prv, (long) i, type, bay, out, flags)) { + err("prv_register failed"); + return -1; + } + + i++; + } + + return 0; +} + +int +model_openmp_breakdown_finish(struct emu *emu, + const struct pcf_value_label **labels) +{ + if (emu->args.breakdown == 0) + return 0; + + struct openmp_emu *memu = EXT(emu, 'P'); + struct breakdown_emu *bemu = &memu->breakdown; + struct pcf *pcf = pvt_get_pcf(bemu->pvt); + long typeid = PRV_OPENMP_BREAKDOWN; + char label[] = "CPU: OpenMP Runtime/Label breakdown"; + struct pcf_type *pcftype = pcf_add_type(pcf, (int) typeid, label); + const struct pcf_value_label *v = NULL; + + /* Emit subsystem values */ + for (v = labels[CH_SUBSYSTEM]; v->label; v++) { + if (pcf_add_value(pcftype, v->value, v->label) == NULL) { + err("pcf_add_value ss failed"); + return -1; + } + } + + /* Emit label values */ + struct system *sys = &emu->system; + for (struct proc *p = sys->procs; p; p = p->gnext) { + struct openmp_proc *proc = EXT(p, 'P'); + struct task_info *info = &proc->task_info; + if (task_create_pcf_types(pcftype, info->types) != 0) { + err("task_create_pcf_types failed"); + return -1; + } + } + + /* Also populate the row labels */ + struct prf *prf = pvt_get_prf(bemu->pvt); + for (int64_t row = 0; row < bemu->nphycpus; row++) { + char name[128]; + if (snprintf(name, 128, "~CPU %4" PRIi64, bemu->nphycpus - row) >= 128) { + err("label too long"); + return -1; + } + + if (prf_add(prf, (long) row, name) != 0) { + err("prf_add failed for %s", name); + return -1; + } + } + + return 0; +} diff --git a/src/emu/openmp/breakdown.h b/src/emu/openmp/breakdown.h new file mode 100644 index 0000000..19325f7 --- /dev/null +++ b/src/emu/openmp/breakdown.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2024-2025 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#ifndef BREAKDOWN_H +#define BREAKDOWN_H + +/* + * The breakdown model is implemented on top of the CPU label and subsystem + * channels. The mux selects the label when the . + * + * +--------+ + * | | + * | v + * | +------+ + * label ------o-->--| | + * | mux0 |-->- out + * subsystem ----->--| | + * +------+ + * + * mux0 output = label if sel is not null, subsystem otherwise. + * + * Then the sort module takes the output of each CPU and sorts the values which + * are propagated to the PRV directly. + * + * +------+ +-----+ + * cpu0.out --->---| |--->---| | + * ... | sort | ... | PRV | + * cpuN.out --->---| |--->---| | + * +------+ +-----+ + */ + +#include +#include "chan.h" +#include "mux.h" +#include "sort.h" + +struct breakdown_cpu { + struct mux mux0; + struct chan out; +}; + +struct breakdown_emu { + int64_t nphycpus; + struct sort sort; + struct pvt *pvt; +}; + +#endif /* BREAKDOWN_H */ diff --git a/src/emu/openmp/openmp_priv.h b/src/emu/openmp/openmp_priv.h index c858504..5736bd1 100644 --- a/src/emu/openmp/openmp_priv.h +++ b/src/emu/openmp/openmp_priv.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC) +/* Copyright (c) 2023-2025 Barcelona Supercomputing Center (BSC) * SPDX-License-Identifier: GPL-3.0-or-later */ #ifndef OPENMP_PRIV_H @@ -8,6 +8,7 @@ #include "task.h" #include "model_cpu.h" #include "model_thread.h" +#include "breakdown.h" /* Private enums */ @@ -63,6 +64,7 @@ struct openmp_thread { struct openmp_cpu { struct model_cpu m; + struct breakdown_cpu breakdown; }; struct openmp_proc { @@ -70,10 +72,19 @@ struct openmp_proc { struct task_info task_info; }; +struct openmp_emu { + struct breakdown_emu breakdown; +}; + int model_openmp_probe(struct emu *emu); int model_openmp_create(struct emu *emu); int model_openmp_connect(struct emu *emu); int model_openmp_event(struct emu *emu); int model_openmp_finish(struct emu *emu); +int model_openmp_breakdown_create(struct emu *emu); +int model_openmp_breakdown_connect(struct emu *emu); +int model_openmp_breakdown_finish(struct emu *emu, + const struct pcf_value_label **labels); + #endif /* OPENMP_PRIV_H */ diff --git a/src/emu/openmp/setup.c b/src/emu/openmp/setup.c index 1b74dde..f835932 100644 --- a/src/emu/openmp/setup.c +++ b/src/emu/openmp/setup.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC) +/* Copyright (c) 2023-2025 Barcelona Supercomputing Center (BSC) * SPDX-License-Identifier: GPL-3.0-or-later */ #include "openmp_priv.h" @@ -278,6 +278,19 @@ model_openmp_create(struct emu *emu) } } + struct openmp_emu *e = calloc(1, sizeof(struct openmp_emu)); + if (e == NULL) { + err("calloc failed:"); + return -1; + } + + extend_set(&emu->ext, model_id, e); + + if (model_openmp_breakdown_create(emu) != 0) { + err("model_openmp_breakdown_create failed"); + return -1; + } + return 0; } @@ -294,6 +307,11 @@ model_openmp_connect(struct emu *emu) return -1; } + if (model_openmp_breakdown_connect(emu) != 0) { + err("model_openmp_breakdown_connect failed"); + return -1; + } + return 0; } @@ -379,6 +397,11 @@ model_openmp_finish(struct emu *emu) return -1; } + if (model_openmp_breakdown_finish(emu, pcf_labels) != 0) { + err("model_openmp_breakdown_finish failed"); + return -1; + } + /* When running in linter mode perform additional checks */ if (emu->args.linter_mode && end_lint(emu) != 0) { err("end_lint failed"); diff --git a/test/rt/openmp/CMakeLists.txt b/test/rt/openmp/CMakeLists.txt index 6bb90e0..bfc317e 100644 --- a/test/rt/openmp/CMakeLists.txt +++ b/test/rt/openmp/CMakeLists.txt @@ -26,7 +26,7 @@ if(NOT NOSV_FOUND) endif() function(openmp_rt_test) - ovni_test(${ARGN}) + ovni_test(${ARGN} SORT) target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv" "-no-pedantic") target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv") @@ -34,7 +34,7 @@ function(openmp_rt_test) set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY ENVIRONMENT "OMP_OVNI=1") set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY - ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni") + ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni,ovni.level=3") endfunction() openmp_rt_test(active.c DRIVER active.driver.sh) @@ -55,7 +55,7 @@ openmp_rt_test(taskloop.c) openmp_rt_test(taskwait.c) openmp_rt_test(team-distribute.c) openmp_rt_test(worksharing-and-tasks.c) -openmp_rt_test(worksharing-mix.c) +openmp_rt_test(worksharing-mix.c BREAKDOWN) openmp_rt_test(worksharing-task.c) openmp_rt_test(worksharing.c) openmp_rt_test(worksharing01.c) diff --git a/test/rt/openmp/worksharing-active-th.driver.sh b/test/rt/openmp/worksharing-active-th.driver.sh index 98531e6..8593601 100644 --- a/test/rt/openmp/worksharing-active-th.driver.sh +++ b/test/rt/openmp/worksharing-active-th.driver.sh @@ -5,6 +5,8 @@ export OMP_NUM_THREADS=1 $target +ovnisort ovni + ovniemu -l ovni # Mark API adds 100 to the type diff --git a/test/rt/openmp/worksharing-mix.c b/test/rt/openmp/worksharing-mix.c index 5dcdb04..70e4650 100644 --- a/test/rt/openmp/worksharing-mix.c +++ b/test/rt/openmp/worksharing-mix.c @@ -34,7 +34,7 @@ int main(void) #pragma omp single for (int i = 0; i < 100; i++) { - #pragma omp task + #pragma omp task label("mini-task") sleep_us(10); } }