Add OpenMP emulation model
For now it only has support for subsystems Co-authored-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
This commit is contained in:
parent
454504d0cc
commit
bf2b3b73a0
42
cfg/cpu/openmp/subsystem.cfg
Normal file
42
cfg/cpu/openmp/subsystem.cfg
Normal file
@ -0,0 +1,42 @@
|
||||
#ParaverCFG
|
||||
ConfigFile.Version: 3.4
|
||||
ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW CPU: OpenMP subsystem of the RUNNING thread >
|
||||
################################################################################
|
||||
window_name CPU: OpenMP subsystem of the RUNNING thread
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 0
|
||||
window_position_y 0
|
||||
window_width 600
|
||||
window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
window_comm_tagsize true
|
||||
window_comm_typeval true
|
||||
window_units Microseconds
|
||||
window_maximum_y 1000.0
|
||||
window_minimum_y 1.0
|
||||
window_compute_y_max true
|
||||
window_level thread
|
||||
window_scale_relative 1.000000000000
|
||||
window_end_time_relative 1.000000000000
|
||||
window_object appl { 1, { All } }
|
||||
window_begin_time_relative 0.000000000000
|
||||
window_open true
|
||||
window_drawmode draw_randnotzero
|
||||
window_drawmode_rows draw_randnotzero
|
||||
window_pixel_size 1
|
||||
window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 50
|
||||
window_filter_module evt_type_label 1 "CPU: OpenMP subsystem of the RUNNING thread"
|
||||
|
42
cfg/thread/openmp/subsystem.cfg
Normal file
42
cfg/thread/openmp/subsystem.cfg
Normal file
@ -0,0 +1,42 @@
|
||||
#ParaverCFG
|
||||
ConfigFile.Version: 3.4
|
||||
ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread >
|
||||
################################################################################
|
||||
window_name Thread: OpenMP subsystem of the RUNNING thread
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 0
|
||||
window_position_y 0
|
||||
window_width 600
|
||||
window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
window_comm_tagsize true
|
||||
window_comm_typeval true
|
||||
window_units Microseconds
|
||||
window_maximum_y 1000.0
|
||||
window_minimum_y 1.0
|
||||
window_compute_y_max true
|
||||
window_level thread
|
||||
window_scale_relative 1.000000000000
|
||||
window_end_time_relative 1.000000000000
|
||||
window_object appl { 1, { All } }
|
||||
window_begin_time_relative 0.000000000000
|
||||
window_open true
|
||||
window_drawmode draw_randnotzero
|
||||
window_drawmode_rows draw_randnotzero
|
||||
window_pixel_size 1
|
||||
window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 50
|
||||
window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread"
|
||||
|
@ -429,6 +429,92 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`:
|
||||
<dd>leaves unordered event region</dd>
|
||||
</dl>
|
||||
|
||||
## Model openmp
|
||||
|
||||
List of events for the model *openmp* with identifier **`P`** at version `1.1.0`:
|
||||
<dl>
|
||||
<dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt>
|
||||
<dd>enters the attached state</dd>
|
||||
<dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt>
|
||||
<dd>leaves the attached state</dd>
|
||||
<dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt>
|
||||
<dd>enters a join barrier</dd>
|
||||
<dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt>
|
||||
<dd>leaves a join barrier</dd>
|
||||
<dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt>
|
||||
<dd>enters a barrier</dd>
|
||||
<dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt>
|
||||
<dd>leaves a barrier</dd>
|
||||
<dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt>
|
||||
<dd>enters a tasking barrier</dd>
|
||||
<dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt>
|
||||
<dd>leaves a tasking barrier</dd>
|
||||
<dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt>
|
||||
<dd>enters a spin wait</dd>
|
||||
<dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt>
|
||||
<dd>leaves a spin wait</dd>
|
||||
<dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt>
|
||||
<dd>begins static for</dd>
|
||||
<dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt>
|
||||
<dd>ceases static for</dd>
|
||||
<dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt>
|
||||
<dd>begins dynamic for init</dd>
|
||||
<dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt>
|
||||
<dd>ceases dynamic for init</dd>
|
||||
<dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt>
|
||||
<dd>begins dynamic for chunk</dd>
|
||||
<dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt>
|
||||
<dd>ceases dynamic for chunk</dd>
|
||||
<dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt>
|
||||
<dd>begins single</dd>
|
||||
<dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt>
|
||||
<dd>ceases single</dd>
|
||||
<dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt>
|
||||
<dd>begins releasing task dependencies</dd>
|
||||
<dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt>
|
||||
<dd>ceases releasing task dependencies</dd>
|
||||
<dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt>
|
||||
<dd>begins waiting for taskwait dependencies</dd>
|
||||
<dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt>
|
||||
<dd>ceases waiting for taskwait dependencies</dd>
|
||||
<dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt>
|
||||
<dd>begins invoking a task</dd>
|
||||
<dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt>
|
||||
<dd>ceases invoking a task</dd>
|
||||
<dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt>
|
||||
<dd>begins invoking an if0 task</dd>
|
||||
<dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt>
|
||||
<dd>ceases invoking an if0 task</dd>
|
||||
<dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt>
|
||||
<dd>begins task allocation</dd>
|
||||
<dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt>
|
||||
<dd>ceases task allocation</dd>
|
||||
<dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt>
|
||||
<dd>begins scheduling a task</dd>
|
||||
<dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt>
|
||||
<dd>ceases scheduling a task</dd>
|
||||
<dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt>
|
||||
<dd>enters a taskwait</dd>
|
||||
<dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt>
|
||||
<dd>leaves a taskwait</dd>
|
||||
<dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt>
|
||||
<dd>enters a taskyield</dd>
|
||||
<dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt>
|
||||
<dd>leaves a taskyield</dd>
|
||||
<dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt>
|
||||
<dd>begins duplicating a task</dd>
|
||||
<dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt>
|
||||
<dd>ceases duplicating a task</dd>
|
||||
<dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt>
|
||||
<dd>begins checking task dependencies</dd>
|
||||
<dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt>
|
||||
<dd>ceases checking task dependencies</dd>
|
||||
<dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt>
|
||||
<dd>enters a taskgroup</dd>
|
||||
<dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt>
|
||||
<dd>leaves a taskgroup</dd>
|
||||
</dl>
|
||||
|
||||
## Model tampi
|
||||
|
||||
List of events for the model *tampi* with identifier **`T`** at version `1.0.0`:
|
||||
|
164
doc/user/emulation/openmp.md
Normal file
164
doc/user/emulation/openmp.md
Normal file
@ -0,0 +1,164 @@
|
||||
# OpenMP Model
|
||||
|
||||
The LLVM OpenMP Runtime is an integral component of the LLVM compiler
|
||||
infrastructure that provides support for the OpenMP (Open Multi-Processing)
|
||||
programming model.
|
||||
|
||||
OpenMP is a widely used API and set of directives for parallel programming,
|
||||
allowing developers to write multi-threaded and multi-process applications more
|
||||
easily.
|
||||
|
||||
This documentation is about an OpenMP runtime built on top of [nOS-V][nosv],
|
||||
leveraging its thread management capabilities while retaining the fundamental
|
||||
characteristics of the original runtime.
|
||||
|
||||
While the modifications introduced to the runtime may appear to be minor, it's
|
||||
important to note that this enhanced version is not API compatible with the
|
||||
original runtime. As a result, it is mandatory to use the clang built in the same
|
||||
[LLVM Project][llvm].
|
||||
|
||||
This document describes all the instrumentation features included in the runtime
|
||||
by both nOS-V and OpenMP to monitor task execution and the execution flow within
|
||||
the runtime library to identify what is happening. This data is useful for both
|
||||
users and developers of the OpenMP runtime to analyze issues and undesired
|
||||
behaviors.
|
||||
|
||||
[llvm]: https://pm.bsc.es/llvm-ompss
|
||||
[nosv]: https://gitlab.bsc.es/nos-v/nos-v
|
||||
|
||||
## How to Generate Execution Traces
|
||||
|
||||
In order to build the OpenMP runtime nOS-V must be provided by using
|
||||
`PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a
|
||||
runtime without instrumentation. However, the user may be able to generate
|
||||
execution traces by enabling nOS-V instrumentation through
|
||||
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a
|
||||
nOS-V installation built with ovni.
|
||||
|
||||
Building OpenMP with instrumentation requires to pass ovni pkg-config path to
|
||||
`PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is
|
||||
because OpenMP is dependent of nOS-V to generate complete execution traces.
|
||||
|
||||
By default, OpenMP will not instrument anything. To enable instrumentation the
|
||||
user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
|
||||
|
||||
The following sections will describe the OpenMP execution trace views and what
|
||||
information is shown there.
|
||||
|
||||
## nOS-V Task Type
|
||||
|
||||
As said in the previous sections. This OpenMP runtime is built on top of nOS-V.
|
||||
So the user can explore what does the execution do there. Here we only describe
|
||||
the task type view. For other views please take a look at the nOS-V chapter.
|
||||
|
||||
In OpenMP, every thread that is launched (main thread included) is shown in a task
|
||||
type with label "openmp". In a task application, every task call will be seen with
|
||||
a task type with label "file:line:col" format referring to the pragma location. This
|
||||
can be changed by using the clause label(string-literal).
|
||||
|
||||
OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for
|
||||
more information. Nevertheless, the OpenMP task view shows it.
|
||||
|
||||
## OpenMP Subsystem
|
||||
|
||||
This view illustrates the activities of each thread with different states:
|
||||
|
||||
- **Attached**: The thread is attached.
|
||||
|
||||
- **Join barrier**: The thread is in the implicit barrier of the parallel region.
|
||||
|
||||
- **Tasking barrier**: The thread is in the additional tasking barrier trying to
|
||||
execute tasks. This event happens if executed with KMP_TASKING=1.
|
||||
|
||||
- **Spin wait**: The thread spin waits for a condition. Usually this event happens
|
||||
in a barrier while waiting for the other threads to reach the barrier. The thread
|
||||
also tries to execute tasks.
|
||||
|
||||
- **For static**: Executing a for static. The length of the event represents all the
|
||||
chunks of iterations executed by the thread. See "Limitations" section.
|
||||
|
||||
- **For dynamic init**: Running the initialization of an OpenMP for dynamic.
|
||||
|
||||
- **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To
|
||||
clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and
|
||||
from 8 to 12, two different events will be shown. See "Limitations" section.
|
||||
|
||||
- **Single**: Running a Single region. All threads of the parallel region will emit
|
||||
the event.
|
||||
|
||||
- **Release deps**: When finishing a task, trying to release dependencies. This
|
||||
event happens although the task has no dependencies.
|
||||
|
||||
- **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled.
|
||||
This appears typically in a task if0 with dependencies or a taskwait with deps.
|
||||
|
||||
- **Invoke task**: Executing a task.
|
||||
|
||||
- **Invoke task if0**: Executing a task if0.
|
||||
|
||||
- **Task alloc**: Allocating the task descriptor.
|
||||
|
||||
- **Task schedule**: Adding the task to the scheduler.
|
||||
|
||||
- **Taskwait**: Running a taskwait.
|
||||
|
||||
- **Taskyield**: Running a taskyield.
|
||||
|
||||
- **Task dup alloc**: Duplicating the task descriptor in a taskloop.
|
||||
|
||||
- **Check deps**: Checking if the task has pending dependencies to be fulfilled. This
|
||||
means that if all dependencies are fulfilled the task will be scheduled.
|
||||
|
||||
- **Taskgroup**: Running a taskgroup.
|
||||
|
||||
## Limitations
|
||||
|
||||
By the way how OpenMP is implemented. There are some instrumentation points that
|
||||
violate ovni subsystem rules. This mostly happens because some directives are lowered
|
||||
partially in the transformed user code, so it is not easy to wrap them into a
|
||||
Single-entry single-exit (SESE) region, like we would do with a regular task invocation,
|
||||
for example.
|
||||
|
||||
All problematic directives are described here so the user is able to understand what
|
||||
is being show in the traces
|
||||
|
||||
- **Task if0**: The lowered user code of a task if0 is:
|
||||
... = __kmpc_omp_task_alloc(...);
|
||||
__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
|
||||
__kmpc_omp_task_begin_if0(...);
|
||||
// Call to the user code
|
||||
omp_task_entry_(...);
|
||||
__kmpc_omp_task_complete_if0(...);
|
||||
|
||||
Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As
|
||||
this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0`
|
||||
and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent.
|
||||
|
||||
- **For static**: The lowered user code of a for static is:
|
||||
// Parallel code
|
||||
__kmpc_for_static_init_4(...);
|
||||
for ( i = ...; i <= ...; ++i )
|
||||
;
|
||||
__kmpc_for_static_fini(...);
|
||||
|
||||
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
|
||||
this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4`
|
||||
and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent.
|
||||
|
||||
- **For dynamic**: The lowered user code of a for dynamic is:
|
||||
|
||||
__kmpc_dispatch_init_4(...);
|
||||
while ( __kmpc_dispatch_next_4(...))
|
||||
{
|
||||
for ( i = ...; i <= ...; ++i )
|
||||
;
|
||||
}
|
||||
|
||||
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
|
||||
this code is generated by the compiler the subsystem view shows:
|
||||
1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init**
|
||||
2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`.
|
||||
with the event **For dynamic chunk**.
|
||||
3. How long it takes to run a loop iteration chunk between the last and the previous
|
||||
`__kmpc_dispatch_next_4` call with the event **For dynamic chunk**.
|
||||
|
@ -61,6 +61,8 @@ add_library(emu STATIC
|
||||
tampi/event.c
|
||||
kernel/setup.c
|
||||
kernel/event.c
|
||||
openmp/setup.c
|
||||
openmp/event.c
|
||||
)
|
||||
target_link_libraries(emu ovni-static)
|
||||
|
||||
|
@ -29,6 +29,7 @@ enum emu_prv_types {
|
||||
PRV_NANOS6_IDLE = 40,
|
||||
PRV_NANOS6_BREAKDOWN = 41,
|
||||
PRV_KERNEL_CS = 45,
|
||||
PRV_OPENMP_SUBSYSTEM = 50,
|
||||
PRV_RESERVED = 100,
|
||||
};
|
||||
|
||||
|
@ -15,6 +15,7 @@ extern struct model_spec model_nodes;
|
||||
extern struct model_spec model_tampi;
|
||||
extern struct model_spec model_mpi;
|
||||
extern struct model_spec model_kernel;
|
||||
extern struct model_spec model_openmp;
|
||||
|
||||
static struct model_spec *models[] = {
|
||||
&model_ovni,
|
||||
@ -24,6 +25,7 @@ static struct model_spec *models[] = {
|
||||
&model_tampi,
|
||||
&model_mpi,
|
||||
&model_kernel,
|
||||
&model_openmp,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
111
src/emu/openmp/event.c
Normal file
111
src/emu/openmp/event.c
Normal file
@ -0,0 +1,111 @@
|
||||
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "openmp_priv.h"
|
||||
#include "chan.h"
|
||||
#include "common.h"
|
||||
#include "emu.h"
|
||||
#include "emu_ev.h"
|
||||
#include "extend.h"
|
||||
#include "model_thread.h"
|
||||
#include "thread.h"
|
||||
#include "value.h"
|
||||
|
||||
enum { PUSH = 1, POP = 2, IGN = 3 };
|
||||
|
||||
static const int fn_table[256][256][3] = {
|
||||
['A'] = {
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED },
|
||||
},
|
||||
['B'] = {
|
||||
['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER },
|
||||
['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER },
|
||||
['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER },
|
||||
['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER },
|
||||
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER },
|
||||
['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER },
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT },
|
||||
},
|
||||
['W'] = {
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC },
|
||||
['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT },
|
||||
['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT },
|
||||
['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK },
|
||||
['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK },
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE },
|
||||
},
|
||||
['T'] = {
|
||||
['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS },
|
||||
['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS },
|
||||
['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS },
|
||||
['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS },
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK },
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 },
|
||||
['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC },
|
||||
['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC },
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
|
||||
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT },
|
||||
['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT },
|
||||
['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD },
|
||||
['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD },
|
||||
['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC },
|
||||
['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC },
|
||||
['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS },
|
||||
['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS },
|
||||
['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP },
|
||||
['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP },
|
||||
},
|
||||
};
|
||||
|
||||
static int
|
||||
process_ev(struct emu *emu)
|
||||
{
|
||||
if (!emu->thread->is_running) {
|
||||
err("current thread %d not running", emu->thread->tid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const int *entry = fn_table[emu->ev->c][emu->ev->v];
|
||||
int chind = entry[0];
|
||||
int action = entry[1];
|
||||
int st = entry[2];
|
||||
|
||||
struct openmp_thread *th = EXT(emu->thread, 'P');
|
||||
struct chan *ch = &th->m.ch[chind];
|
||||
|
||||
if (action == PUSH) {
|
||||
return chan_push(ch, value_int64(st));
|
||||
} else if (action == POP) {
|
||||
return chan_pop(ch, value_int64(st));
|
||||
} else if (action == IGN) {
|
||||
return 0; /* do nothing */
|
||||
}
|
||||
|
||||
err("unknown openmp function event");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_event(struct emu *emu)
|
||||
{
|
||||
dbg("in openmp_event");
|
||||
if (emu->ev->m != 'P') {
|
||||
err("unexpected event model %c", emu->ev->m);
|
||||
return -1;
|
||||
}
|
||||
|
||||
dbg("got openmp event %s", emu->ev->mcv);
|
||||
if (process_ev(emu) != 0) {
|
||||
err("error processing openmp event");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
56
src/emu/openmp/openmp_priv.h
Normal file
56
src/emu/openmp/openmp_priv.h
Normal file
@ -0,0 +1,56 @@
|
||||
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef OPENMP_PRIV_H
|
||||
#define OPENMP_PRIV_H
|
||||
|
||||
#include "emu.h"
|
||||
#include "model_cpu.h"
|
||||
#include "model_thread.h"
|
||||
|
||||
/* Private enums */
|
||||
|
||||
enum openmp_chan {
|
||||
CH_SUBSYSTEM = 0,
|
||||
CH_MAX,
|
||||
};
|
||||
|
||||
|
||||
enum openmp_function_values {
|
||||
ST_ATTACHED = 1,
|
||||
ST_JOIN_BARRIER,
|
||||
ST_BARRIER,
|
||||
ST_TASKING_BARRIER,
|
||||
ST_SPIN_WAIT,
|
||||
ST_FOR_STATIC,
|
||||
ST_FOR_DYNAMIC_INIT,
|
||||
ST_FOR_DYNAMIC_CHUNK,
|
||||
ST_SINGLE,
|
||||
ST_RELEASE_DEPS,
|
||||
ST_TASKWAIT_DEPS,
|
||||
ST_INVOKE_TASK,
|
||||
ST_INVOKE_TASK_IF0,
|
||||
ST_TASK_ALLOC,
|
||||
ST_TASK_SCHEDULE,
|
||||
ST_TASKWAIT,
|
||||
ST_TASKYIELD,
|
||||
ST_TASK_DUP_ALLOC,
|
||||
ST_CHECK_DEPS,
|
||||
ST_TASKGROUP,
|
||||
};
|
||||
|
||||
struct openmp_thread {
|
||||
struct model_thread m;
|
||||
};
|
||||
|
||||
struct openmp_cpu {
|
||||
struct model_cpu m;
|
||||
};
|
||||
|
||||
int model_openmp_probe(struct emu *emu);
|
||||
int model_openmp_create(struct emu *emu);
|
||||
int model_openmp_connect(struct emu *emu);
|
||||
int model_openmp_event(struct emu *emu);
|
||||
int model_openmp_finish(struct emu *emu);
|
||||
|
||||
#endif /* OPENMP_PRIV_H */
|
251
src/emu/openmp/setup.c
Normal file
251
src/emu/openmp/setup.c
Normal file
@ -0,0 +1,251 @@
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "openmp_priv.h"
|
||||
#include <stddef.h>
|
||||
#include "chan.h"
|
||||
#include "common.h"
|
||||
#include "emu.h"
|
||||
#include "emu_args.h"
|
||||
#include "emu_prv.h"
|
||||
#include "ev_spec.h"
|
||||
#include "extend.h"
|
||||
#include "model.h"
|
||||
#include "model_chan.h"
|
||||
#include "model_cpu.h"
|
||||
#include "model_pvt.h"
|
||||
#include "model_thread.h"
|
||||
#include "pv/pcf.h"
|
||||
#include "pv/prv.h"
|
||||
#include "system.h"
|
||||
#include "thread.h"
|
||||
#include "track.h"
|
||||
#include "value.h"
|
||||
|
||||
static const char model_name[] = "openmp";
|
||||
enum { model_id = 'P' };
|
||||
|
||||
static struct ev_decl model_evlist[] = {
|
||||
PAIR_E("PA[", "PA]", "the attached state")
|
||||
|
||||
PAIR_E("PBj", "PBJ", "a join barrier")
|
||||
PAIR_E("PBb", "PBB", "a barrier")
|
||||
PAIR_E("PBt", "PBT", "a tasking barrier")
|
||||
PAIR_E("PBs", "PBS", "a spin wait")
|
||||
|
||||
PAIR_B("PWs", "PWS", "static for")
|
||||
PAIR_B("PWd", "PWD", "dynamic for init")
|
||||
PAIR_B("PWc", "PWC", "dynamic for chunk")
|
||||
PAIR_B("PWi", "PWI", "single")
|
||||
|
||||
PAIR_B("PTr", "PTR", "releasing task dependencies")
|
||||
PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
|
||||
PAIR_B("PT[", "PT]", "invoking a task")
|
||||
PAIR_B("PTi", "PTI", "invoking an if0 task")
|
||||
PAIR_B("PTa", "PTA", "task allocation")
|
||||
PAIR_B("PTs", "PTS", "scheduling a task")
|
||||
PAIR_E("PTt", "PTT", "a taskwait")
|
||||
PAIR_E("PTy", "PTY", "a taskyield")
|
||||
PAIR_B("PTd", "PTD", "duplicating a task")
|
||||
PAIR_B("PTc", "PTC", "checking task dependencies")
|
||||
PAIR_E("PTg", "PTG", "a taskgroup")
|
||||
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
struct model_spec model_openmp = {
|
||||
.name = model_name,
|
||||
.version = "1.1.0",
|
||||
.evlist = model_evlist,
|
||||
.model = model_id,
|
||||
.create = model_openmp_create,
|
||||
.connect = model_openmp_connect,
|
||||
.event = model_openmp_event,
|
||||
.probe = model_openmp_probe,
|
||||
.finish = model_openmp_finish,
|
||||
};
|
||||
|
||||
/* ----------------- channels ------------------ */
|
||||
|
||||
static const char *chan_name[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = "subsystem",
|
||||
};
|
||||
|
||||
static const int chan_stack[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = 1,
|
||||
};
|
||||
|
||||
/* ----------------- pvt ------------------ */
|
||||
|
||||
static const int pvt_type[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = PRV_OPENMP_SUBSYSTEM,
|
||||
};
|
||||
|
||||
static const char *pcf_prefix[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = "OpenMP subsystem",
|
||||
};
|
||||
|
||||
static const struct pcf_value_label openmp_subsystem_values[] = {
|
||||
{ ST_ATTACHED, "Attached" },
|
||||
{ ST_JOIN_BARRIER, "Join barrier" },
|
||||
{ ST_BARRIER, "Barrier" },
|
||||
{ ST_TASKING_BARRIER, "Tasking barrier" },
|
||||
{ ST_SPIN_WAIT, "Spin wait" },
|
||||
{ ST_FOR_STATIC, "For static" },
|
||||
{ ST_FOR_DYNAMIC_INIT, "For dynamic init" },
|
||||
{ ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" },
|
||||
{ ST_SINGLE, "Single" },
|
||||
{ ST_RELEASE_DEPS, "Release deps" },
|
||||
{ ST_TASKWAIT_DEPS, "Taskwait deps" },
|
||||
{ ST_INVOKE_TASK, "Invoke task" },
|
||||
{ ST_INVOKE_TASK_IF0, "Invoke task if0" },
|
||||
{ ST_TASK_ALLOC, "Task alloc" },
|
||||
{ ST_TASK_SCHEDULE, "Task schedule" },
|
||||
{ ST_TASKWAIT, "Taskwait" },
|
||||
{ ST_TASKYIELD, "Taskyield" },
|
||||
{ ST_TASK_DUP_ALLOC, "Task dup alloc" },
|
||||
{ ST_CHECK_DEPS, "Check deps" },
|
||||
{ ST_TASKGROUP, "Taskgroup" },
|
||||
{ -1, NULL },
|
||||
};
|
||||
|
||||
static const struct pcf_value_label *pcf_labels[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = openmp_subsystem_values,
|
||||
};
|
||||
|
||||
static const long prv_flags[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = PRV_SKIPDUP,
|
||||
};
|
||||
|
||||
static const struct model_pvt_spec pvt_spec = {
|
||||
.type = pvt_type,
|
||||
.prefix = pcf_prefix,
|
||||
.label = pcf_labels,
|
||||
.flags = prv_flags,
|
||||
};
|
||||
|
||||
/* ----------------- tracking ------------------ */
|
||||
|
||||
static const int th_track[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = TRACK_TH_RUN,
|
||||
};
|
||||
|
||||
static const int cpu_track[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = TRACK_TH_RUN,
|
||||
};
|
||||
|
||||
/* ----------------- chan_spec ------------------ */
|
||||
|
||||
static const struct model_chan_spec th_chan = {
|
||||
.nch = CH_MAX,
|
||||
.prefix = model_name,
|
||||
.ch_names = chan_name,
|
||||
.ch_stack = chan_stack,
|
||||
.pvt = &pvt_spec,
|
||||
.track = th_track,
|
||||
};
|
||||
|
||||
static const struct model_chan_spec cpu_chan = {
|
||||
.nch = CH_MAX,
|
||||
.prefix = model_name,
|
||||
.ch_names = chan_name,
|
||||
.ch_stack = chan_stack,
|
||||
.pvt = &pvt_spec,
|
||||
.track = cpu_track,
|
||||
};
|
||||
|
||||
/* ----------------- models ------------------ */
|
||||
|
||||
static const struct model_cpu_spec cpu_spec = {
|
||||
.size = sizeof(struct openmp_cpu),
|
||||
.chan = &cpu_chan,
|
||||
.model = &model_openmp,
|
||||
};
|
||||
|
||||
static const struct model_thread_spec th_spec = {
|
||||
.size = sizeof(struct openmp_thread),
|
||||
.chan = &th_chan,
|
||||
.model = &model_openmp,
|
||||
};
|
||||
|
||||
/* ----------------------------------------------------- */
|
||||
|
||||
int
|
||||
model_openmp_probe(struct emu *emu)
|
||||
{
|
||||
return model_version_probe(&model_openmp, emu);
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_create(struct emu *emu)
|
||||
{
|
||||
if (model_thread_create(emu, &th_spec) != 0) {
|
||||
err("model_thread_init failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (model_cpu_create(emu, &cpu_spec) != 0) {
|
||||
err("model_cpu_init failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_connect(struct emu *emu)
|
||||
{
|
||||
if (model_thread_connect(emu, &th_spec) != 0) {
|
||||
err("model_thread_connect failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (model_cpu_connect(emu, &cpu_spec) != 0) {
|
||||
err("model_cpu_connect failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
end_lint(struct emu *emu)
|
||||
{
|
||||
/* Only run the check if we finished the complete trace */
|
||||
if (!emu->finished)
|
||||
return 0;
|
||||
|
||||
struct system *sys = &emu->system;
|
||||
|
||||
/* Ensure we run out of function states */
|
||||
for (struct thread *t = sys->threads; t; t = t->gnext) {
|
||||
struct openmp_thread *th = EXT(t, model_id);
|
||||
struct chan *ch = &th->m.ch[CH_SUBSYSTEM];
|
||||
int stacked = ch->data.stack.n;
|
||||
if (stacked > 0) {
|
||||
struct value top;
|
||||
if (chan_read(ch, &top) != 0) {
|
||||
err("chan_read failed for function");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err("thread %d ended with %d stacked openmp functions",
|
||||
t->tid, stacked);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_finish(struct emu *emu)
|
||||
{
|
||||
/* When running in linter mode perform additional checks */
|
||||
if (emu->args.linter_mode && end_lint(emu) != 0) {
|
||||
err("end_lint failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user