From bf2b3b73a0daf81a63871ae3e67086eaa07f58bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Pe=C3=B1acoba=20Veigas?= Date: Wed, 27 Sep 2023 09:31:14 +0000 Subject: [PATCH] Add OpenMP emulation model For now it only has support for subsystems Co-authored-by: Rodrigo Arias Mallo --- cfg/cpu/openmp/subsystem.cfg | 42 ++++++ cfg/thread/openmp/subsystem.cfg | 42 ++++++ doc/user/emulation/events.md | 86 +++++++++++ doc/user/emulation/openmp.md | 164 +++++++++++++++++++++ src/emu/CMakeLists.txt | 2 + src/emu/emu_prv.h | 1 + src/emu/models.c | 2 + src/emu/openmp/event.c | 111 ++++++++++++++ src/emu/openmp/openmp_priv.h | 56 +++++++ src/emu/openmp/setup.c | 251 ++++++++++++++++++++++++++++++++ 10 files changed, 757 insertions(+) create mode 100644 cfg/cpu/openmp/subsystem.cfg create mode 100644 cfg/thread/openmp/subsystem.cfg create mode 100644 doc/user/emulation/openmp.md create mode 100644 src/emu/openmp/event.c create mode 100644 src/emu/openmp/openmp_priv.h create mode 100644 src/emu/openmp/setup.c diff --git a/cfg/cpu/openmp/subsystem.cfg b/cfg/cpu/openmp/subsystem.cfg new file mode 100644 index 0000000..21803d4 --- /dev/null +++ b/cfg/cpu/openmp/subsystem.cfg @@ -0,0 +1,42 @@ +#ParaverCFG +ConfigFile.Version: 3.4 +ConfigFile.NumWindows: 1 + + +################################################################################ +< NEW DISPLAYING WINDOW CPU: OpenMP subsystem of the RUNNING thread > +################################################################################ +window_name CPU: OpenMP subsystem of the RUNNING thread +window_type single +window_id 1 +window_position_x 0 +window_position_y 0 +window_width 600 +window_height 150 +window_comm_lines_enabled true +window_flags_enabled false +window_noncolor_mode true +window_logical_filtered true +window_physical_filtered false +window_comm_fromto true +window_comm_tagsize true +window_comm_typeval true +window_units Microseconds +window_maximum_y 1000.0 +window_minimum_y 1.0 +window_compute_y_max true +window_level thread +window_scale_relative 1.000000000000 +window_end_time_relative 1.000000000000 +window_object appl { 1, { All } } +window_begin_time_relative 0.000000000000 +window_open true +window_drawmode draw_randnotzero +window_drawmode_rows draw_randnotzero +window_pixel_size 1 +window_labels_to_draw 1 +window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } +window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } +window_filter_module evt_type 1 50 +window_filter_module evt_type_label 1 "CPU: OpenMP subsystem of the RUNNING thread" + diff --git a/cfg/thread/openmp/subsystem.cfg b/cfg/thread/openmp/subsystem.cfg new file mode 100644 index 0000000..849ac61 --- /dev/null +++ b/cfg/thread/openmp/subsystem.cfg @@ -0,0 +1,42 @@ +#ParaverCFG +ConfigFile.Version: 3.4 +ConfigFile.NumWindows: 1 + + +################################################################################ +< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread > +################################################################################ +window_name Thread: OpenMP subsystem of the RUNNING thread +window_type single +window_id 1 +window_position_x 0 +window_position_y 0 +window_width 600 +window_height 150 +window_comm_lines_enabled true +window_flags_enabled false +window_noncolor_mode true +window_logical_filtered true +window_physical_filtered false +window_comm_fromto true +window_comm_tagsize true +window_comm_typeval true +window_units Microseconds +window_maximum_y 1000.0 +window_minimum_y 1.0 +window_compute_y_max true +window_level thread +window_scale_relative 1.000000000000 +window_end_time_relative 1.000000000000 +window_object appl { 1, { All } } +window_begin_time_relative 0.000000000000 +window_open true +window_drawmode draw_randnotzero +window_drawmode_rows draw_randnotzero +window_pixel_size 1 +window_labels_to_draw 1 +window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } +window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } +window_filter_module evt_type 1 50 +window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread" + diff --git a/doc/user/emulation/events.md b/doc/user/emulation/events.md index 45b5a13..c2521d0 100644 --- a/doc/user/emulation/events.md +++ b/doc/user/emulation/events.md @@ -429,6 +429,92 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`:
leaves unordered event region
+## Model openmp + +List of events for the model *openmp* with identifier **`P`** at version `1.1.0`: +
+
PA[
+
enters the attached state
+
PA]
+
leaves the attached state
+
PBj
+
enters a join barrier
+
PBJ
+
leaves a join barrier
+
PBb
+
enters a barrier
+
PBB
+
leaves a barrier
+
PBt
+
enters a tasking barrier
+
PBT
+
leaves a tasking barrier
+
PBs
+
enters a spin wait
+
PBS
+
leaves a spin wait
+
PWs
+
begins static for
+
PWS
+
ceases static for
+
PWd
+
begins dynamic for init
+
PWD
+
ceases dynamic for init
+
PWc
+
begins dynamic for chunk
+
PWC
+
ceases dynamic for chunk
+
PWi
+
begins single
+
PWI
+
ceases single
+
PTr
+
begins releasing task dependencies
+
PTR
+
ceases releasing task dependencies
+
PTw
+
begins waiting for taskwait dependencies
+
PTW
+
ceases waiting for taskwait dependencies
+
PT[
+
begins invoking a task
+
PT]
+
ceases invoking a task
+
PTi
+
begins invoking an if0 task
+
PTI
+
ceases invoking an if0 task
+
PTa
+
begins task allocation
+
PTA
+
ceases task allocation
+
PTs
+
begins scheduling a task
+
PTS
+
ceases scheduling a task
+
PTt
+
enters a taskwait
+
PTT
+
leaves a taskwait
+
PTy
+
enters a taskyield
+
PTY
+
leaves a taskyield
+
PTd
+
begins duplicating a task
+
PTD
+
ceases duplicating a task
+
PTc
+
begins checking task dependencies
+
PTC
+
ceases checking task dependencies
+
PTg
+
enters a taskgroup
+
PTG
+
leaves a taskgroup
+
+ ## Model tampi List of events for the model *tampi* with identifier **`T`** at version `1.0.0`: diff --git a/doc/user/emulation/openmp.md b/doc/user/emulation/openmp.md new file mode 100644 index 0000000..4806c3b --- /dev/null +++ b/doc/user/emulation/openmp.md @@ -0,0 +1,164 @@ +# OpenMP Model + +The LLVM OpenMP Runtime is an integral component of the LLVM compiler +infrastructure that provides support for the OpenMP (Open Multi-Processing) +programming model. + +OpenMP is a widely used API and set of directives for parallel programming, +allowing developers to write multi-threaded and multi-process applications more +easily. + +This documentation is about an OpenMP runtime built on top of [nOS-V][nosv], +leveraging its thread management capabilities while retaining the fundamental +characteristics of the original runtime. + +While the modifications introduced to the runtime may appear to be minor, it's +important to note that this enhanced version is not API compatible with the +original runtime. As a result, it is mandatory to use the clang built in the same +[LLVM Project][llvm]. + +This document describes all the instrumentation features included in the runtime +by both nOS-V and OpenMP to monitor task execution and the execution flow within +the runtime library to identify what is happening. This data is useful for both +users and developers of the OpenMP runtime to analyze issues and undesired +behaviors. + +[llvm]: https://pm.bsc.es/llvm-ompss +[nosv]: https://gitlab.bsc.es/nos-v/nos-v + +## How to Generate Execution Traces + +In order to build the OpenMP runtime nOS-V must be provided by using +`PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a +runtime without instrumentation. However, the user may be able to generate +execution traces by enabling nOS-V instrumentation through +`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a +nOS-V installation built with ovni. + +Building OpenMP with instrumentation requires to pass ovni pkg-config path to +`PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is +because OpenMP is dependent of nOS-V to generate complete execution traces. + +By default, OpenMP will not instrument anything. To enable instrumentation the +user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. + +The following sections will describe the OpenMP execution trace views and what +information is shown there. + +## nOS-V Task Type + +As said in the previous sections. This OpenMP runtime is built on top of nOS-V. +So the user can explore what does the execution do there. Here we only describe +the task type view. For other views please take a look at the nOS-V chapter. + +In OpenMP, every thread that is launched (main thread included) is shown in a task +type with label "openmp". In a task application, every task call will be seen with +a task type with label "file:line:col" format referring to the pragma location. This +can be changed by using the clause label(string-literal). + +OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for +more information. Nevertheless, the OpenMP task view shows it. + +## OpenMP Subsystem + +This view illustrates the activities of each thread with different states: + +- **Attached**: The thread is attached. + +- **Join barrier**: The thread is in the implicit barrier of the parallel region. + +- **Tasking barrier**: The thread is in the additional tasking barrier trying to + execute tasks. This event happens if executed with KMP_TASKING=1. + +- **Spin wait**: The thread spin waits for a condition. Usually this event happens + in a barrier while waiting for the other threads to reach the barrier. The thread + also tries to execute tasks. + +- **For static**: Executing a for static. The length of the event represents all the + chunks of iterations executed by the thread. See "Limitations" section. + +- **For dynamic init**: Running the initialization of an OpenMP for dynamic. + +- **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To + clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and + from 8 to 12, two different events will be shown. See "Limitations" section. + +- **Single**: Running a Single region. All threads of the parallel region will emit + the event. + +- **Release deps**: When finishing a task, trying to release dependencies. This + event happens although the task has no dependencies. + +- **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled. + This appears typically in a task if0 with dependencies or a taskwait with deps. + +- **Invoke task**: Executing a task. + +- **Invoke task if0**: Executing a task if0. + +- **Task alloc**: Allocating the task descriptor. + +- **Task schedule**: Adding the task to the scheduler. + +- **Taskwait**: Running a taskwait. + +- **Taskyield**: Running a taskyield. + +- **Task dup alloc**: Duplicating the task descriptor in a taskloop. + +- **Check deps**: Checking if the task has pending dependencies to be fulfilled. This + means that if all dependencies are fulfilled the task will be scheduled. + +- **Taskgroup**: Running a taskgroup. + +## Limitations + +By the way how OpenMP is implemented. There are some instrumentation points that +violate ovni subsystem rules. This mostly happens because some directives are lowered +partially in the transformed user code, so it is not easy to wrap them into a +Single-entry single-exit (SESE) region, like we would do with a regular task invocation, +for example. + +All problematic directives are described here so the user is able to understand what +is being show in the traces + +- **Task if0**: The lowered user code of a task if0 is: + ... = __kmpc_omp_task_alloc(...); + __kmpc_omp_taskwait_deps_51(...); // If task has dependencies + __kmpc_omp_task_begin_if0(...); + // Call to the user code + omp_task_entry_(...); + __kmpc_omp_task_complete_if0(...); + + Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As + this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0` + and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent. + +- **For static**: The lowered user code of a for static is: + // Parallel code + __kmpc_for_static_init_4(...); + for ( i = ...; i <= ...; ++i ) + ; + __kmpc_for_static_fini(...); + + Ideally, the for loop should be called by the runtime to ensure the SESE structure. As + this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4` + and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent. + +- **For dynamic**: The lowered user code of a for dynamic is: + + __kmpc_dispatch_init_4(...); + while ( __kmpc_dispatch_next_4(...)) + { + for ( i = ...; i <= ...; ++i ) + ; + } + + Ideally, the for loop should be called by the runtime to ensure the SESE structure. As + this code is generated by the compiler the subsystem view shows: + 1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init** + 2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`. + with the event **For dynamic chunk**. + 3. How long it takes to run a loop iteration chunk between the last and the previous + `__kmpc_dispatch_next_4` call with the event **For dynamic chunk**. + diff --git a/src/emu/CMakeLists.txt b/src/emu/CMakeLists.txt index a042b24..c9112ab 100644 --- a/src/emu/CMakeLists.txt +++ b/src/emu/CMakeLists.txt @@ -61,6 +61,8 @@ add_library(emu STATIC tampi/event.c kernel/setup.c kernel/event.c + openmp/setup.c + openmp/event.c ) target_link_libraries(emu ovni-static) diff --git a/src/emu/emu_prv.h b/src/emu/emu_prv.h index 4ae4c47..664ac0b 100644 --- a/src/emu/emu_prv.h +++ b/src/emu/emu_prv.h @@ -29,6 +29,7 @@ enum emu_prv_types { PRV_NANOS6_IDLE = 40, PRV_NANOS6_BREAKDOWN = 41, PRV_KERNEL_CS = 45, + PRV_OPENMP_SUBSYSTEM = 50, PRV_RESERVED = 100, }; diff --git a/src/emu/models.c b/src/emu/models.c index 26a0f0c..37e4238 100644 --- a/src/emu/models.c +++ b/src/emu/models.c @@ -15,6 +15,7 @@ extern struct model_spec model_nodes; extern struct model_spec model_tampi; extern struct model_spec model_mpi; extern struct model_spec model_kernel; +extern struct model_spec model_openmp; static struct model_spec *models[] = { &model_ovni, @@ -24,6 +25,7 @@ static struct model_spec *models[] = { &model_tampi, &model_mpi, &model_kernel, + &model_openmp, NULL }; diff --git a/src/emu/openmp/event.c b/src/emu/openmp/event.c new file mode 100644 index 0000000..1250041 --- /dev/null +++ b/src/emu/openmp/event.c @@ -0,0 +1,111 @@ +/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "openmp_priv.h" +#include "chan.h" +#include "common.h" +#include "emu.h" +#include "emu_ev.h" +#include "extend.h" +#include "model_thread.h" +#include "thread.h" +#include "value.h" + +enum { PUSH = 1, POP = 2, IGN = 3 }; + +static const int fn_table[256][256][3] = { + ['A'] = { + ['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED }, + [']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED }, + }, + ['B'] = { + ['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER }, + ['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER }, + ['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER }, + ['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER }, + ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER }, + ['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER }, + ['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT }, + ['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT }, + }, + ['W'] = { + ['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC }, + ['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC }, + ['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT }, + ['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT }, + ['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK }, + ['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK }, + ['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE }, + ['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE }, + }, + ['T'] = { + ['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS }, + ['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS }, + ['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS }, + ['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS }, + ['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK }, + [']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK }, + ['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 }, + ['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 }, + ['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC }, + ['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC }, + ['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE }, + ['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE }, + ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT }, + ['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT }, + ['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD }, + ['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD }, + ['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC }, + ['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC }, + ['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS }, + ['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS }, + ['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP }, + ['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP }, + }, +}; + +static int +process_ev(struct emu *emu) +{ + if (!emu->thread->is_running) { + err("current thread %d not running", emu->thread->tid); + return -1; + } + + const int *entry = fn_table[emu->ev->c][emu->ev->v]; + int chind = entry[0]; + int action = entry[1]; + int st = entry[2]; + + struct openmp_thread *th = EXT(emu->thread, 'P'); + struct chan *ch = &th->m.ch[chind]; + + if (action == PUSH) { + return chan_push(ch, value_int64(st)); + } else if (action == POP) { + return chan_pop(ch, value_int64(st)); + } else if (action == IGN) { + return 0; /* do nothing */ + } + + err("unknown openmp function event"); + return -1; +} + +int +model_openmp_event(struct emu *emu) +{ + dbg("in openmp_event"); + if (emu->ev->m != 'P') { + err("unexpected event model %c", emu->ev->m); + return -1; + } + + dbg("got openmp event %s", emu->ev->mcv); + if (process_ev(emu) != 0) { + err("error processing openmp event"); + return -1; + } + + return 0; +} diff --git a/src/emu/openmp/openmp_priv.h b/src/emu/openmp/openmp_priv.h new file mode 100644 index 0000000..a5ee105 --- /dev/null +++ b/src/emu/openmp/openmp_priv.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#ifndef OPENMP_PRIV_H +#define OPENMP_PRIV_H + +#include "emu.h" +#include "model_cpu.h" +#include "model_thread.h" + +/* Private enums */ + +enum openmp_chan { + CH_SUBSYSTEM = 0, + CH_MAX, +}; + + +enum openmp_function_values { + ST_ATTACHED = 1, + ST_JOIN_BARRIER, + ST_BARRIER, + ST_TASKING_BARRIER, + ST_SPIN_WAIT, + ST_FOR_STATIC, + ST_FOR_DYNAMIC_INIT, + ST_FOR_DYNAMIC_CHUNK, + ST_SINGLE, + ST_RELEASE_DEPS, + ST_TASKWAIT_DEPS, + ST_INVOKE_TASK, + ST_INVOKE_TASK_IF0, + ST_TASK_ALLOC, + ST_TASK_SCHEDULE, + ST_TASKWAIT, + ST_TASKYIELD, + ST_TASK_DUP_ALLOC, + ST_CHECK_DEPS, + ST_TASKGROUP, +}; + +struct openmp_thread { + struct model_thread m; +}; + +struct openmp_cpu { + struct model_cpu m; +}; + +int model_openmp_probe(struct emu *emu); +int model_openmp_create(struct emu *emu); +int model_openmp_connect(struct emu *emu); +int model_openmp_event(struct emu *emu); +int model_openmp_finish(struct emu *emu); + +#endif /* OPENMP_PRIV_H */ diff --git a/src/emu/openmp/setup.c b/src/emu/openmp/setup.c new file mode 100644 index 0000000..ddad033 --- /dev/null +++ b/src/emu/openmp/setup.c @@ -0,0 +1,251 @@ +/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "openmp_priv.h" +#include +#include "chan.h" +#include "common.h" +#include "emu.h" +#include "emu_args.h" +#include "emu_prv.h" +#include "ev_spec.h" +#include "extend.h" +#include "model.h" +#include "model_chan.h" +#include "model_cpu.h" +#include "model_pvt.h" +#include "model_thread.h" +#include "pv/pcf.h" +#include "pv/prv.h" +#include "system.h" +#include "thread.h" +#include "track.h" +#include "value.h" + +static const char model_name[] = "openmp"; +enum { model_id = 'P' }; + +static struct ev_decl model_evlist[] = { + PAIR_E("PA[", "PA]", "the attached state") + + PAIR_E("PBj", "PBJ", "a join barrier") + PAIR_E("PBb", "PBB", "a barrier") + PAIR_E("PBt", "PBT", "a tasking barrier") + PAIR_E("PBs", "PBS", "a spin wait") + + PAIR_B("PWs", "PWS", "static for") + PAIR_B("PWd", "PWD", "dynamic for init") + PAIR_B("PWc", "PWC", "dynamic for chunk") + PAIR_B("PWi", "PWI", "single") + + PAIR_B("PTr", "PTR", "releasing task dependencies") + PAIR_B("PTw", "PTW", "waiting for taskwait dependencies") + PAIR_B("PT[", "PT]", "invoking a task") + PAIR_B("PTi", "PTI", "invoking an if0 task") + PAIR_B("PTa", "PTA", "task allocation") + PAIR_B("PTs", "PTS", "scheduling a task") + PAIR_E("PTt", "PTT", "a taskwait") + PAIR_E("PTy", "PTY", "a taskyield") + PAIR_B("PTd", "PTD", "duplicating a task") + PAIR_B("PTc", "PTC", "checking task dependencies") + PAIR_E("PTg", "PTG", "a taskgroup") + + { NULL, NULL }, +}; + +struct model_spec model_openmp = { + .name = model_name, + .version = "1.1.0", + .evlist = model_evlist, + .model = model_id, + .create = model_openmp_create, + .connect = model_openmp_connect, + .event = model_openmp_event, + .probe = model_openmp_probe, + .finish = model_openmp_finish, +}; + +/* ----------------- channels ------------------ */ + +static const char *chan_name[CH_MAX] = { + [CH_SUBSYSTEM] = "subsystem", +}; + +static const int chan_stack[CH_MAX] = { + [CH_SUBSYSTEM] = 1, +}; + +/* ----------------- pvt ------------------ */ + +static const int pvt_type[CH_MAX] = { + [CH_SUBSYSTEM] = PRV_OPENMP_SUBSYSTEM, +}; + +static const char *pcf_prefix[CH_MAX] = { + [CH_SUBSYSTEM] = "OpenMP subsystem", +}; + +static const struct pcf_value_label openmp_subsystem_values[] = { + { ST_ATTACHED, "Attached" }, + { ST_JOIN_BARRIER, "Join barrier" }, + { ST_BARRIER, "Barrier" }, + { ST_TASKING_BARRIER, "Tasking barrier" }, + { ST_SPIN_WAIT, "Spin wait" }, + { ST_FOR_STATIC, "For static" }, + { ST_FOR_DYNAMIC_INIT, "For dynamic init" }, + { ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" }, + { ST_SINGLE, "Single" }, + { ST_RELEASE_DEPS, "Release deps" }, + { ST_TASKWAIT_DEPS, "Taskwait deps" }, + { ST_INVOKE_TASK, "Invoke task" }, + { ST_INVOKE_TASK_IF0, "Invoke task if0" }, + { ST_TASK_ALLOC, "Task alloc" }, + { ST_TASK_SCHEDULE, "Task schedule" }, + { ST_TASKWAIT, "Taskwait" }, + { ST_TASKYIELD, "Taskyield" }, + { ST_TASK_DUP_ALLOC, "Task dup alloc" }, + { ST_CHECK_DEPS, "Check deps" }, + { ST_TASKGROUP, "Taskgroup" }, + { -1, NULL }, +}; + +static const struct pcf_value_label *pcf_labels[CH_MAX] = { + [CH_SUBSYSTEM] = openmp_subsystem_values, +}; + +static const long prv_flags[CH_MAX] = { + [CH_SUBSYSTEM] = PRV_SKIPDUP, +}; + +static const struct model_pvt_spec pvt_spec = { + .type = pvt_type, + .prefix = pcf_prefix, + .label = pcf_labels, + .flags = prv_flags, +}; + +/* ----------------- tracking ------------------ */ + +static const int th_track[CH_MAX] = { + [CH_SUBSYSTEM] = TRACK_TH_RUN, +}; + +static const int cpu_track[CH_MAX] = { + [CH_SUBSYSTEM] = TRACK_TH_RUN, +}; + +/* ----------------- chan_spec ------------------ */ + +static const struct model_chan_spec th_chan = { + .nch = CH_MAX, + .prefix = model_name, + .ch_names = chan_name, + .ch_stack = chan_stack, + .pvt = &pvt_spec, + .track = th_track, +}; + +static const struct model_chan_spec cpu_chan = { + .nch = CH_MAX, + .prefix = model_name, + .ch_names = chan_name, + .ch_stack = chan_stack, + .pvt = &pvt_spec, + .track = cpu_track, +}; + +/* ----------------- models ------------------ */ + +static const struct model_cpu_spec cpu_spec = { + .size = sizeof(struct openmp_cpu), + .chan = &cpu_chan, + .model = &model_openmp, +}; + +static const struct model_thread_spec th_spec = { + .size = sizeof(struct openmp_thread), + .chan = &th_chan, + .model = &model_openmp, +}; + +/* ----------------------------------------------------- */ + +int +model_openmp_probe(struct emu *emu) +{ + return model_version_probe(&model_openmp, emu); +} + +int +model_openmp_create(struct emu *emu) +{ + if (model_thread_create(emu, &th_spec) != 0) { + err("model_thread_init failed"); + return -1; + } + + if (model_cpu_create(emu, &cpu_spec) != 0) { + err("model_cpu_init failed"); + return -1; + } + + return 0; +} + +int +model_openmp_connect(struct emu *emu) +{ + if (model_thread_connect(emu, &th_spec) != 0) { + err("model_thread_connect failed"); + return -1; + } + + if (model_cpu_connect(emu, &cpu_spec) != 0) { + err("model_cpu_connect failed"); + return -1; + } + + return 0; +} + +static int +end_lint(struct emu *emu) +{ + /* Only run the check if we finished the complete trace */ + if (!emu->finished) + return 0; + + struct system *sys = &emu->system; + + /* Ensure we run out of function states */ + for (struct thread *t = sys->threads; t; t = t->gnext) { + struct openmp_thread *th = EXT(t, model_id); + struct chan *ch = &th->m.ch[CH_SUBSYSTEM]; + int stacked = ch->data.stack.n; + if (stacked > 0) { + struct value top; + if (chan_read(ch, &top) != 0) { + err("chan_read failed for function"); + return -1; + } + + err("thread %d ended with %d stacked openmp functions", + t->tid, stacked); + return -1; + } + } + + return 0; +} + +int +model_openmp_finish(struct emu *emu) +{ + /* When running in linter mode perform additional checks */ + if (emu->args.linter_mode && end_lint(emu) != 0) { + err("end_lint failed"); + return -1; + } + + return 0; +}