Update OpenMP emulation model

Add more tests, subsystem states and documentation.
This commit is contained in:
Rodrigo Arias 2024-01-16 12:21:16 +01:00
parent bf2b3b73a0
commit 55318d9da7
36 changed files with 1116 additions and 287 deletions

View File

@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Added
- Add OpenMP model (`P`) at version 1.1.0 (currently it only supports subsystems
and only works with the OpenMP-V runtime, on top of nOS-V).
### Changed ### Changed
- Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}. - Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}.

View File

@ -16,6 +16,8 @@ window_height 150
window_comm_lines_enabled true window_comm_lines_enabled true
window_flags_enabled false window_flags_enabled false
window_noncolor_mode true window_noncolor_mode true
window_custom_color_enabled true
window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
window_logical_filtered true window_logical_filtered true
window_physical_filtered false window_physical_filtered false
window_comm_fromto true window_comm_fromto true

View File

@ -4,9 +4,9 @@ ConfigFile.NumWindows: 1
################################################################################ ################################################################################
< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread > < NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the ACTIVE thread >
################################################################################ ################################################################################
window_name Thread: OpenMP subsystem of the RUNNING thread window_name Thread: OpenMP subsystem of the ACTIVE thread
window_type single window_type single
window_id 1 window_id 1
window_position_x 0 window_position_x 0
@ -16,6 +16,8 @@ window_height 150
window_comm_lines_enabled true window_comm_lines_enabled true
window_flags_enabled false window_flags_enabled false
window_noncolor_mode true window_noncolor_mode true
window_custom_color_enabled true
window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
window_logical_filtered true window_logical_filtered true
window_physical_filtered false window_physical_filtered false
window_comm_fromto true window_comm_fromto true
@ -38,5 +40,5 @@ window_labels_to_draw 1
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
window_filter_module evt_type 1 50 window_filter_module evt_type 1 50
window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread" window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the ACTIVE thread"

View File

@ -433,86 +433,130 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`:
List of events for the model *openmp* with identifier **`P`** at version `1.1.0`: List of events for the model *openmp* with identifier **`P`** at version `1.1.0`:
<dl> <dl>
<dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt>
<dd>enters the attached state</dd>
<dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt>
<dd>leaves the attached state</dd>
<dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt>
<dd>enters a join barrier</dd>
<dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt>
<dd>leaves a join barrier</dd>
<dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt> <dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt>
<dd>enters a barrier</dd> <dd>begins plain barrier</dd>
<dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt> <dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt>
<dd>leaves a barrier</dd> <dd>ceases plain barrier</dd>
<dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt>
<dd>begins join barrier</dd>
<dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt>
<dd>ceases join barrier</dd>
<dt><a id="PBf" href="#PBf"><pre>PBf</pre></a></dt>
<dd>begins fork barrier</dd>
<dt><a id="PBF" href="#PBF"><pre>PBF</pre></a></dt>
<dd>ceases fork barrier</dd>
<dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt> <dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt>
<dd>enters a tasking barrier</dd> <dd>begins tasking barrier</dd>
<dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt> <dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt>
<dd>leaves a tasking barrier</dd> <dd>ceases tasking barrier</dd>
<dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt> <dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt>
<dd>enters a spin wait</dd> <dd>begins spin wait</dd>
<dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt> <dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt>
<dd>leaves a spin wait</dd> <dd>ceases spin wait</dd>
<dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt> <dt><a id="PIa" href="#PIa"><pre>PIa</pre></a></dt>
<dd>begins static for</dd> <dd>begins critical acquiring</dd>
<dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt> <dt><a id="PIA" href="#PIA"><pre>PIA</pre></a></dt>
<dd>ceases static for</dd> <dd>ceases critical acquiring</dd>
<dt><a id="PIr" href="#PIr"><pre>PIr</pre></a></dt>
<dd>begins critical releasing</dd>
<dt><a id="PIR" href="#PIR"><pre>PIR</pre></a></dt>
<dd>ceases critical releasing</dd>
<dt><a id="PI[" href="#PI["><pre>PI[</pre></a></dt>
<dd>begins critical section</dd>
<dt><a id="PI]" href="#PI]"><pre>PI]</pre></a></dt>
<dd>ceases critical section</dd>
<dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt> <dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt>
<dd>begins dynamic for init</dd> <dd>begins distribute</dd>
<dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt> <dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt>
<dd>ceases distribute</dd>
<dt><a id="PWy" href="#PWy"><pre>PWy</pre></a></dt>
<dd>begins dynamic for init</dd>
<dt><a id="PWY" href="#PWY"><pre>PWY</pre></a></dt>
<dd>ceases dynamic for init</dd> <dd>ceases dynamic for init</dd>
<dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt> <dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt>
<dd>begins dynamic for chunk</dd> <dd>begins dynamic for chunk</dd>
<dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt> <dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt>
<dd>ceases dynamic for chunk</dd> <dd>ceases dynamic for chunk</dd>
<dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt>
<dd>begins static for</dd>
<dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt>
<dd>ceases static for</dd>
<dt><a id="PWe" href="#PWe"><pre>PWe</pre></a></dt>
<dd>begins section</dd>
<dt><a id="PWE" href="#PWE"><pre>PWE</pre></a></dt>
<dd>ceases section</dd>
<dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt> <dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt>
<dd>begins single</dd> <dd>begins single</dd>
<dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt> <dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt>
<dd>ceases single</dd> <dd>ceases single</dd>
<dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt>
<dd>begins releasing task dependencies</dd>
<dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt>
<dd>ceases releasing task dependencies</dd>
<dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt>
<dd>begins waiting for taskwait dependencies</dd>
<dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt>
<dd>ceases waiting for taskwait dependencies</dd>
<dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt>
<dd>begins invoking a task</dd>
<dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt>
<dd>ceases invoking a task</dd>
<dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt>
<dd>begins invoking an if0 task</dd>
<dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt>
<dd>ceases invoking an if0 task</dd>
<dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt> <dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt>
<dd>begins task allocation</dd> <dd>begins task allocation</dd>
<dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt> <dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt>
<dd>ceases task allocation</dd> <dd>ceases task allocation</dd>
<dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt>
<dd>begins scheduling a task</dd>
<dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt>
<dd>ceases scheduling a task</dd>
<dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt>
<dd>enters a taskwait</dd>
<dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt>
<dd>leaves a taskwait</dd>
<dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt>
<dd>enters a taskyield</dd>
<dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt>
<dd>leaves a taskyield</dd>
<dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt>
<dd>begins duplicating a task</dd>
<dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt>
<dd>ceases duplicating a task</dd>
<dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt> <dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt>
<dd>begins checking task dependencies</dd> <dd>begins checking task dependencies</dd>
<dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt> <dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt>
<dd>ceases checking task dependencies</dd> <dd>ceases checking task dependencies</dd>
<dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt>
<dd>begins duplicating a task</dd>
<dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt>
<dd>ceases duplicating a task</dd>
<dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt>
<dd>begins releasing task dependencies</dd>
<dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt>
<dd>ceases releasing task dependencies</dd>
<dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt>
<dd>begins running a task</dd>
<dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt>
<dd>ceases running a task</dd>
<dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt>
<dd>begins running an if0 task</dd>
<dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt>
<dd>ceases running an if0 task</dd>
<dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt>
<dd>begins scheduling a task</dd>
<dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt>
<dd>ceases scheduling a task</dd>
<dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt> <dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt>
<dd>enters a taskgroup</dd> <dd>begins a taskgroup</dd>
<dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt> <dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt>
<dd>leaves a taskgroup</dd> <dd>ceases a taskgroup</dd>
<dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt>
<dd>begins a taskwait</dd>
<dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt>
<dd>ceases a taskwait</dd>
<dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt>
<dd>begins waiting for taskwait dependencies</dd>
<dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt>
<dd>ceases waiting for taskwait dependencies</dd>
<dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt>
<dd>begins a taskyield</dd>
<dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt>
<dd>ceases a taskyield</dd>
<dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt>
<dd>enters the attached state</dd>
<dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt>
<dd>leaves the attached state</dd>
<dt><a id="PMi" href="#PMi"><pre>PMi</pre></a></dt>
<dd>begins microtask internal</dd>
<dt><a id="PMI" href="#PMI"><pre>PMI</pre></a></dt>
<dd>ceases microtask internal</dd>
<dt><a id="PMu" href="#PMu"><pre>PMu</pre></a></dt>
<dd>begins microtask user code</dd>
<dt><a id="PMU" href="#PMU"><pre>PMU</pre></a></dt>
<dd>ceases microtask user code</dd>
<dt><a id="PH[" href="#PH["><pre>PH[</pre></a></dt>
<dd>begins worker loop</dd>
<dt><a id="PH]" href="#PH]"><pre>PH]</pre></a></dt>
<dd>ceases worker loop</dd>
<dt><a id="PCf" href="#PCf"><pre>PCf</pre></a></dt>
<dd>begins fork call</dd>
<dt><a id="PCF" href="#PCF"><pre>PCF</pre></a></dt>
<dd>ceases fork call</dd>
<dt><a id="PCi" href="#PCi"><pre>PCi</pre></a></dt>
<dd>begins initialization</dd>
<dt><a id="PCI" href="#PCI"><pre>PCI</pre></a></dt>
<dd>ceases initialization</dd>
</dl> </dl>
## Model tampi ## Model tampi

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

@ -1,164 +1,243 @@
# OpenMP Model # OpenMP model
The LLVM OpenMP Runtime is an integral component of the LLVM compiler The [OpenMP programming model](https://www.openmp.org) is a widely used API and
infrastructure that provides support for the OpenMP (Open Multi-Processing) set of directives for parallel programming, allowing developers to write
programming model. multi-threaded and multi-process applications more easily. In this document we
refer to the
[version 5.2 of the OpenMP specification](https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf).
OpenMP is a widely used API and set of directives for parallel programming, The [LLVM OpenMP Runtime](https://openmp.llvm.org/design/Runtimes.html) provides
allowing developers to write multi-threaded and multi-process applications more an implementation of the OpenMP specification as a component of the LLVM
easily. compiler infrastructure. We have modified the LLVM OpenMP runtime to run on top
of the [nOS-V](https://gitlab.bsc.es/nos-v/nos-v) runtime as part of the
[OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **OpenMP-V**.
This documentation is about an OpenMP runtime built on top of [nOS-V][nosv], We have added instrumentation events to OpenMP-V designed to be enabled along
leveraging its thread management capabilities while retaining the fundamental the [nOS-V instrumentation](nosv.md). This document describes all the
characteristics of the original runtime. instrumentation features included in our modified OpenMP-V runtime to identify
what is happening. This data is useful for both users and developers of the
OpenMP runtime to analyze issues and undesired behaviors.
While the modifications introduced to the runtime may appear to be minor, it's !!! Note
important to note that this enhanced version is not API compatible with the
original runtime. As a result, it is mandatory to use the clang built in the same
[LLVM Project][llvm].
This document describes all the instrumentation features included in the runtime Instrumenting the original OpenMP runtime from the LLVM project is planned
by both nOS-V and OpenMP to monitor task execution and the execution flow within but is not yet posible. For now you must use the modified OpenMP-V runtime
the runtime library to identify what is happening. This data is useful for both with nOS-V.
users and developers of the OpenMP runtime to analyze issues and undesired
behaviors.
[llvm]: https://pm.bsc.es/llvm-ompss ## Enable the instrumentation
[nosv]: https://gitlab.bsc.es/nos-v/nos-v
## How to Generate Execution Traces To generate runtime traces, you will have to:
In order to build the OpenMP runtime nOS-V must be provided by using 1. **Build nOS-V with ovni support:** Refer to the
`PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a [nOS-V
runtime without instrumentation. However, the user may be able to generate documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md).
execution traces by enabling nOS-V instrumentation through Typically you should use the `--with-ovni` option at configure time to specify
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a where ovni is installed.
nOS-V installation built with ovni. 2. **Build OpenMP-V with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH`
environment variable to specify the nOS-V and ovni installation
when configuring CMake.
3. **Enable the instrumentation in nOS-V at runtime:** Refer to the
[nOS-V documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md)
to find out how to enable the tracing at runtime. Typically you can just set
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
4. **Enable the instrumentation of OpenMP-V at runtime:** Set the environment
variable `OMP_OVNI=1`.
Building OpenMP with instrumentation requires to pass ovni pkg-config path to Currently there is only support for the subsystem view, which is documented
`PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is below. The view is complemented with the information of [nOS-V views](nosv.md),
because OpenMP is dependent of nOS-V to generate complete execution traces. as OpenMP-V uses nOS-V tasks to run the workers.
By default, OpenMP will not instrument anything. To enable instrumentation the ## Subsystem view
user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
The following sections will describe the OpenMP execution trace views and what ![Subsystem view example](fig/openmp-subsystem.png)
information is shown there.
## nOS-V Task Type
As said in the previous sections. This OpenMP runtime is built on top of nOS-V.
So the user can explore what does the execution do there. Here we only describe
the task type view. For other views please take a look at the nOS-V chapter.
In OpenMP, every thread that is launched (main thread included) is shown in a task
type with label "openmp". In a task application, every task call will be seen with
a task type with label "file:line:col" format referring to the pragma location. This
can be changed by using the clause label(string-literal).
OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for
more information. Nevertheless, the OpenMP task view shows it.
## OpenMP Subsystem
This view illustrates the activities of each thread with different states: This view illustrates the activities of each thread with different states:
- **Attached**: The thread is attached. - **Work-distribution subsystem**: Related to work-distribution constructs,
[in Chapter 11][workdis].
- **Join barrier**: The thread is in the implicit barrier of the parallel region. - **Distribute**: Running a *Distribute* region.
- **Tasking barrier**: The thread is in the additional tasking barrier trying to - **Dynamic for chunk**: Running a chunk of a dynamic *for*, which often
execute tasks. This event happens if executed with KMP_TASKING=1. involve running more than one iteration of the loop. See the
[limitations](#dynamic_for) below.
- **Spin wait**: The thread spin waits for a condition. Usually this event happens - **Dynamic for initialization**: Preparing a dynamic *for*.
in a barrier while waiting for the other threads to reach the barrier. The thread
also tries to execute tasks.
- **For static**: Executing a for static. The length of the event represents all the - **Static for chunk**: Executing the assigned iterations of an static
chunks of iterations executed by the thread. See "Limitations" section. *for*.
- **For dynamic init**: Running the initialization of an OpenMP for dynamic. - **Single**: Running a *Single* region. All threads of the parallel region
participate.
- **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To - **Section**: Running a *Section* region. All threads of the parallel region
clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and participate.
from 8 to 12, two different events will be shown. See "Limitations" section.
- **Single**: Running a Single region. All threads of the parallel region will emit - **Task subsystem**: Related to tasking constructs, [in Chapter 12][tasking].
the event.
- **Release deps**: When finishing a task, trying to release dependencies. This - **Allocation**: Allocating the task descriptor.
event happens although the task has no dependencies.
- **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled. - **Check deps**: Checking if the task has pending dependencies to be
This appears typically in a task if0 with dependencies or a taskwait with deps. fulfilled. When all dependencies are fulfilled the task will be scheduled.
- **Invoke task**: Executing a task. - **Duplicating**: Duplicating the task descriptor in a taskloop.
- **Invoke task if0**: Executing a task if0. - **Releasing deps**: Releasing dependencies at the end of a task. This
state is always present even if the task has no dependencies.
- **Task alloc**: Allocating the task descriptor. - **Running task**: Executing a task.
- **Task schedule**: Adding the task to the scheduler. - **Running task if0**: Executing a task if0.
- **Taskwait**: Running a taskwait. - **Scheduling**: Adding the task to the scheduler for execution.
- **Taskyield**: Running a taskyield. - **Taskgroup**: Waiting in a *taskgroup* construct.
- **Task dup alloc**: Duplicating the task descriptor in a taskloop. - **Taskwait**: Waiting in a *taskwait* construct.
- **Check deps**: Checking if the task has pending dependencies to be fulfilled. This - **Taskwait deps**: Trying to execute tasks until dependencies have been
means that if all dependencies are fulfilled the task will be scheduled. fulfilled. This appears typically in a task if0 with dependencies or a
taskwait with deps.
- **Taskyield**: Performing a *taskyield* construct.
- **Taskgroup**: Running a taskgroup. - **Critical subsystem**: Related to the *critical* Constuct, in [Section 15.2][critical].
- **Acquiring**: Waiting to acquire a *Critical* section.
- **Section**: Running the *Critical* section.
- **Releasing**: Waiting to release a *Critical* section.
- **Barrier subsystem**: Related to barriers, in [Section 15.3][barrier].
**All barriers can try to execute tasks**.
- **Barrier: Fork**: Workers wait for a release signal from the master thread to
continue. The master can continue as soon as it signals the workers. It is
done at the beginning of a fork-join region.
- **Barrier: Join**: The master thread waits until all workers finish their work.
Workers can continue as soon as they signal the master. It is done at the
end of a fork-join region.
- **Barrier: Plain**: Performing a plain barrier, which waits for a release
signal from the master thread to continue. It is done at the beginning of
a fork-join region, in the `__kmp_join_barrier()` function.
- **Barrier: Task**: Blocked in an additional tasking barrier *until all previous
tasks have been executed*. Only happens when executed with `KMP_TASKING=1`.
- **Runtime subsystem**: Internal operations of the runtime.
- **Attached**: Present after the call to `nosv_attach()` and before
`nosv_detach()`. This state is a hack.
- **Fork call**: Preparing a parallel section using the fork-join model.
Only called from the master thread.
- **Init**: Initializing the OpenMP-V runtime.
- **Internal microtask**: Running a internal OpenMP-V function as a microtask.
- **User microtask**: Running user code as a microtask in a worker thread.
- **Worker main Loop**: Running the main loop, where the workers run the
fork barrier, run a microtask and perform a join barrier until there is no
more work.
!!! Note
The generated HTML version of the OpenMP 5.2 specification has some parts
missing, so we link directly to the PDF file which may not work in some
browsers.
[workdis]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.11
[tasking]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.12
[critical]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.2
[barrier]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.3
## Limitations ## Limitations
By the way how OpenMP is implemented. There are some instrumentation points that As the compiler generates the code that perform the calls to the OpenMP-V
violate ovni subsystem rules. This mostly happens because some directives are lowered runtime, there are some parts of the execution that are complicated to
partially in the transformed user code, so it is not easy to wrap them into a instrument by just placing a pair of events to delimite a function.
Single-entry single-exit (SESE) region, like we would do with a regular task invocation,
for example.
All problematic directives are described here so the user is able to understand what For those cases we use an approximation which is documented in the following
is being show in the traces subsections.
- **Task if0**: The lowered user code of a task if0 is: ### Dynamic for
... = __kmpc_omp_task_alloc(...);
__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
__kmpc_omp_task_begin_if0(...);
// Call to the user code
omp_task_entry_(...);
__kmpc_omp_task_complete_if0(...);
Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As The generated code of a *dynamic for* has the following structure:
this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0`
and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent.
- **For static**: The lowered user code of a for static is: ```c
// Parallel code __kmpc_dispatch_init_4(...);
__kmpc_for_static_init_4(...); while (__kmpc_dispatch_next_4(...)) {
for ( i = ...; i <= ...; ++i ) for (i = ...; i <= ...; i++) {
; // User code ...
__kmpc_for_static_fini(...); }
}
```
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As The function `__kmpc_dispatch_next_4()` returns `true` if there are more
this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4` chunks (group of iterations) to be executed by the thread, otherwise it returns
and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent. `false`.
- **For dynamic**: The lowered user code of a for dynamic is: Ideally we want to instrument each chunk with a pair of begin and end events.
__kmpc_dispatch_init_4(...); The problem with the instrumentation is that there is no easy way of determining
while ( __kmpc_dispatch_next_4(...)) if the call to `__kmpc_dispatch_next_4()` is processing the first chunk, just
{ after `__kmpc_dispatch_init_4()`, or is coming from other chunks due to the
for ( i = ...; i <= ...; ++i ) while loop.
;
}
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As Therefore, from the `__kmpc_dispatch_next_4()` alone, we cannot determine if we
this code is generated by the compiler the subsystem view shows: need to only emit a single "begin a new chunk" event or we need to emit the pair
1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init** of events "finish the last chunk" and "begin a new one".
2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`.
with the event **For dynamic chunk**.
3. How long it takes to run a loop iteration chunk between the last and the previous
`__kmpc_dispatch_next_4` call with the event **For dynamic chunk**.
So, as a workaround, we emit an event from the end of `__kmpc_dispatch_init_4()`
starting a new chunk (which is fake), and then from `__kmpc_dispatch_next_4()` we
always emit the "finish the last chunk" and "begin a new one" events (unless
there are no more chunks, in which case we don't emit the "begin a new one"
event).
This will cause an spurious *Work-distribution: Dynamic for chunk* state at the
beginning of each dynamic for, which should be very short and is not really a
chunk.
### Static for
The generated code of an *static for* has the following structure:
```c
__kmpc_for_static_init_4(...);
for (i = ...; i <= ...; i++) {
// User code ...
}
__kmpc_for_static_fini(...);
```
As this code is generated by the compiler we cannot easily add the begin/end
pair of events to mark the *Work-distribution: Static for chunk* state.
We assume that by placing the "begin processing a chunk" event at the end of
`__kmpc_for_static_init_4()` and the "end processing the chunk" event at
the beginning of `__kmpc_for_static_fini()` is equivalent to adding the
events surrounding the for loop.
### Task if0
The generated code of an *if0 task* has the following structure:
```c
... = __kmpc_omp_task_alloc(...);
__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
__kmpc_omp_task_begin_if0(...);
// Call to the user code
omp_task_entry_(...);
__kmpc_omp_task_complete_if0(...);
```
Instead of injecting the begin and end events in the user code, we
approximate it by placing the "begin if0 task" event at the end of the
`__kmpc_omp_task_begin_if0` function and the "end if0 task" event at the
beginning of `__kmpc_omp_task_complete_if0`. This state will be shown as
*Task: Running task if0*.

View File

@ -7,11 +7,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1701968480, "lastModified": 1705310446,
"narHash": "sha256-YoKN8FZllNQfpEpMqGOBv77kp9J0mlVRlhixWbcDqWg=", "narHash": "sha256-PaPnkGotb2omIV6OsS72MGkqNN6Q/iHLlXQZ6S3vWOY=",
"ref": "refs/heads/master", "ref": "refs/heads/master",
"rev": "c4d5135fde108401417fdcdf5e1c8d11aeca4f32", "rev": "3b21a32d835ff06741d5d59cd023ff2ae1ecb19f",
"revCount": 931, "revCount": 932,
"type": "git", "type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs" "url": "https://git.sr.ht/~rodarima/bscpkgs"
}, },

View File

@ -7,11 +7,15 @@
outputs = { self, nixpkgs, bscpkgs }: outputs = { self, nixpkgs, bscpkgs }:
let let
# Set to true to replace all libovni in all runtimes with the current
# source. Causes large rebuilds on changes of ovni.
useLocalOvni = false;
ovniOverlay = final: prev: { ovniOverlay = final: prev: {
nosv = prev.nosv.override { nosv = prev.nosv.override {
useGit = true; useGit = true;
gitBranch = "master"; gitBranch = "master";
gitCommit = "6a63fd4378ba458243dda3159500c1450edf0e82"; gitCommit = "9abad7d31476e97842d3b42f1fc1fb03d4cf817b";
}; };
nanos6 = prev.nanos6.override { nanos6 = prev.nanos6.override {
useGit = true; useGit = true;
@ -23,13 +27,27 @@
gitBranch = "master"; gitBranch = "master";
gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f"; gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f";
}; };
clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override {
useGit = true;
gitBranch = "master";
gitCommit = "9dc4a4deea5e09850435782026eaae2f5290d886";
};
# Use a fixed commit for libovni
ovniFixed = prev.ovni.override {
useGit = true;
gitBranch = "master";
gitCommit = "68fc8b0eba299c3a7fa3833ace2c94933a26749e";
};
# Build with the current source # Build with the current source
ovni = prev.ovni.overrideAttrs (old: rec { ovniLocal = prev.ovni.overrideAttrs (old: rec {
pname = "ovni-local"; pname = "ovni-local";
version = if self ? shortRev then self.shortRev else "dirty"; version = if self ? shortRev then self.shortRev else "dirty";
src = self; src = self;
cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ]; cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ];
}); });
# Select correct ovni for libovni
ovni = if (useLocalOvni) then final.ovniLocal else final.ovniFixed;
}; };
pkgs = import nixpkgs { pkgs = import nixpkgs {
system = "x86_64-linux"; system = "x86_64-linux";
@ -51,12 +69,12 @@
]; ];
lib = pkgs.lib; lib = pkgs.lib;
in { in {
packages.x86_64-linux.ovniPackages = rec { packages.x86_64-linux.ovniPackages = {
# Allow inspection of packages from the command line
inherit pkgs;
} // rec {
# Build with the current source # Build with the current source
local = pkgs.ovni.overrideAttrs (old: { local = pkgs.ovniLocal;
pname = "ovni-local";
src = self;
});
# Build in Debug mode # Build in Debug mode
debug = local.overrideAttrs (old: { debug = local.overrideAttrs (old: {
@ -97,12 +115,13 @@
# We need to be able to exit the chroot to run Nanos6 tests, as they # We need to be able to exit the chroot to run Nanos6 tests, as they
# require access to /sys for hwloc # require access to /sys for hwloc
__noChroot = true; __noChroot = true;
buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes ]); buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes openmpv ]);
cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ]; cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ];
preConfigure = old.preConfigure or "" + '' preConfigure = old.preConfigure or "" + ''
export NOSV_HOME="${pkgs.nosv}" export NOSV_HOME="${pkgs.nosv}"
export NODES_HOME="${pkgs.nodes}" export NODES_HOME="${pkgs.nodes}"
export NANOS6_HOME="${pkgs.nanos6}" export NANOS6_HOME="${pkgs.nanos6}"
export OPENMP_RUNTIME="libompv"
''; '';
}); });

View File

@ -37,6 +37,7 @@ nav:
- user/emulation/nanos6.md - user/emulation/nanos6.md
- user/emulation/tampi.md - user/emulation/tampi.md
- user/emulation/mpi.md - user/emulation/mpi.md
- user/emulation/openmp.md
- user/emulation/events.md - user/emulation/events.md
- CHANGELOG.md - CHANGELOG.md
- 'Developer guide': - 'Developer guide':

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) /* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */ * SPDX-License-Identifier: GPL-3.0-or-later */
#include "openmp_priv.h" #include "openmp_priv.h"
@ -14,53 +14,83 @@
enum { PUSH = 1, POP = 2, IGN = 3 }; enum { PUSH = 1, POP = 2, IGN = 3 };
static const int fn_table[256][256][3] = { static const int fn_table[256][256][3] = {
['A'] = {
['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED },
[']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED },
},
['B'] = { ['B'] = {
['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER }, ['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_PLAIN },
['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER }, ['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER_PLAIN },
['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER }, ['j'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_JOIN },
['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER }, ['J'] = { CH_SUBSYSTEM, POP, ST_BARRIER_JOIN },
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER }, ['f'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_FORK },
['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER }, ['F'] = { CH_SUBSYSTEM, POP, ST_BARRIER_FORK },
['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT }, ['t'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_TASK },
['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT }, ['T'] = { CH_SUBSYSTEM, POP, ST_BARRIER_TASK },
['s'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
['S'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
},
['I'] = {
['a'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_ACQ },
['A'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_ACQ },
['r'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_REL },
['R'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_REL },
['['] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_SECTION },
[']'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_SECTION },
}, },
['W'] = { ['W'] = {
['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC }, ['d'] = { CH_SUBSYSTEM, PUSH, ST_WD_DISTRIBUTE },
['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC }, ['D'] = { CH_SUBSYSTEM, POP, ST_WD_DISTRIBUTE },
['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT }, ['c'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_CHUNK },
['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT }, ['C'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_CHUNK },
['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK }, ['y'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_INIT },
['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK }, ['Y'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_INIT },
['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE }, ['s'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_STATIC },
['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE }, ['S'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_STATIC },
['e'] = { CH_SUBSYSTEM, PUSH, ST_WD_SECTION },
['E'] = { CH_SUBSYSTEM, POP, ST_WD_SECTION },
['i'] = { CH_SUBSYSTEM, PUSH, ST_WD_SINGLE },
['I'] = { CH_SUBSYSTEM, POP, ST_WD_SINGLE },
}, },
['T'] = { ['T'] = {
['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS },
['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS },
['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS },
['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS },
['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK },
[']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK },
['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 },
['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 },
['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC }, ['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC },
['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC }, ['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC },
['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE }, ['c'] = { CH_SUBSYSTEM, PUSH, ST_TASK_CHECK_DEPS },
['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE }, ['C'] = { CH_SUBSYSTEM, POP, ST_TASK_CHECK_DEPS },
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT },
['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT },
['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD },
['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD },
['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC }, ['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC },
['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC }, ['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC },
['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS }, ['r'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RELEASE_DEPS },
['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS }, ['R'] = { CH_SUBSYSTEM, POP, ST_TASK_RELEASE_DEPS },
['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP }, ['['] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN },
['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP }, [']'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN },
['i'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN_IF0 },
['I'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN_IF0 },
['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
['g'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKGROUP },
['G'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKGROUP },
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT },
['T'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT },
['w'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT_DEPS },
['W'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT_DEPS },
['y'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKYIELD },
['Y'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKYIELD },
},
['A'] = {
['['] = { CH_SUBSYSTEM, PUSH, ST_RT_ATTACHED },
[']'] = { CH_SUBSYSTEM, POP, ST_RT_ATTACHED },
},
['M'] = {
['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_INTERNAL },
['I'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_INTERNAL },
['u'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_USER },
['U'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_USER },
},
['H'] = {
['['] = { CH_SUBSYSTEM, PUSH, ST_RT_WORKER_LOOP },
[']'] = { CH_SUBSYSTEM, POP, ST_RT_WORKER_LOOP },
},
['C'] = {
['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_INIT },
['I'] = { CH_SUBSYSTEM, POP, ST_RT_INIT },
['f'] = { CH_SUBSYSTEM, PUSH, ST_RT_FORK_CALL },
['F'] = { CH_SUBSYSTEM, POP, ST_RT_FORK_CALL },
}, },
}; };

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) /* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */ * SPDX-License-Identifier: GPL-3.0-or-later */
#ifndef OPENMP_PRIV_H #ifndef OPENMP_PRIV_H
@ -15,28 +15,42 @@ enum openmp_chan {
CH_MAX, CH_MAX,
}; };
enum openmp_function_values { enum openmp_function_values {
ST_ATTACHED = 1, ST_BARRIER_FORK = 1,
ST_JOIN_BARRIER, ST_BARRIER_JOIN,
ST_BARRIER, ST_BARRIER_PLAIN,
ST_TASKING_BARRIER, ST_BARRIER_SPIN_WAIT,
ST_SPIN_WAIT, ST_BARRIER_TASK,
ST_FOR_STATIC, /* Critical */
ST_FOR_DYNAMIC_INIT, ST_CRITICAL_ACQ,
ST_FOR_DYNAMIC_CHUNK, ST_CRITICAL_REL,
ST_SINGLE, ST_CRITICAL_SECTION,
ST_RELEASE_DEPS, /* Work-distribution */
ST_TASKWAIT_DEPS, ST_WD_DISTRIBUTE,
ST_INVOKE_TASK, ST_WD_FOR_DYNAMIC_CHUNK,
ST_INVOKE_TASK_IF0, ST_WD_FOR_DYNAMIC_INIT,
ST_WD_FOR_STATIC,
ST_WD_SECTION,
ST_WD_SINGLE,
/* Task */
ST_TASK_ALLOC, ST_TASK_ALLOC,
ST_TASK_SCHEDULE, ST_TASK_CHECK_DEPS,
ST_TASKWAIT,
ST_TASKYIELD,
ST_TASK_DUP_ALLOC, ST_TASK_DUP_ALLOC,
ST_CHECK_DEPS, ST_TASK_RELEASE_DEPS,
ST_TASKGROUP, ST_TASK_RUN,
ST_TASK_RUN_IF0,
ST_TASK_SCHEDULE,
ST_TASK_TASKGROUP,
ST_TASK_TASKWAIT,
ST_TASK_TASKWAIT_DEPS,
ST_TASK_TASKYIELD,
/* Runtime */
ST_RT_ATTACHED,
ST_RT_FORK_CALL,
ST_RT_INIT,
ST_RT_MICROTASK_INTERNAL,
ST_RT_MICROTASK_USER,
ST_RT_WORKER_LOOP,
}; };
struct openmp_thread { struct openmp_thread {

View File

@ -26,29 +26,44 @@ static const char model_name[] = "openmp";
enum { model_id = 'P' }; enum { model_id = 'P' };
static struct ev_decl model_evlist[] = { static struct ev_decl model_evlist[] = {
PAIR_E("PA[", "PA]", "the attached state") PAIR_B("PBb", "PBB", "plain barrier")
PAIR_B("PBj", "PBJ", "join barrier")
PAIR_B("PBf", "PBF", "fork barrier")
PAIR_B("PBt", "PBT", "tasking barrier")
PAIR_B("PBs", "PBS", "spin wait")
PAIR_E("PBj", "PBJ", "a join barrier") PAIR_B("PIa", "PIA", "critical acquiring")
PAIR_E("PBb", "PBB", "a barrier") PAIR_B("PIr", "PIR", "critical releasing")
PAIR_E("PBt", "PBT", "a tasking barrier") PAIR_B("PI[", "PI]", "critical section")
PAIR_E("PBs", "PBS", "a spin wait")
PAIR_B("PWs", "PWS", "static for") PAIR_B("PWd", "PWD", "distribute")
PAIR_B("PWd", "PWD", "dynamic for init") PAIR_B("PWy", "PWY", "dynamic for init")
PAIR_B("PWc", "PWC", "dynamic for chunk") PAIR_B("PWc", "PWC", "dynamic for chunk")
PAIR_B("PWs", "PWS", "static for")
PAIR_B("PWe", "PWE", "section")
PAIR_B("PWi", "PWI", "single") PAIR_B("PWi", "PWI", "single")
PAIR_B("PTr", "PTR", "releasing task dependencies")
PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
PAIR_B("PT[", "PT]", "invoking a task")
PAIR_B("PTi", "PTI", "invoking an if0 task")
PAIR_B("PTa", "PTA", "task allocation") PAIR_B("PTa", "PTA", "task allocation")
PAIR_B("PTs", "PTS", "scheduling a task")
PAIR_E("PTt", "PTT", "a taskwait")
PAIR_E("PTy", "PTY", "a taskyield")
PAIR_B("PTd", "PTD", "duplicating a task")
PAIR_B("PTc", "PTC", "checking task dependencies") PAIR_B("PTc", "PTC", "checking task dependencies")
PAIR_E("PTg", "PTG", "a taskgroup") PAIR_B("PTd", "PTD", "duplicating a task")
PAIR_B("PTr", "PTR", "releasing task dependencies")
PAIR_B("PT[", "PT]", "running a task")
PAIR_B("PTi", "PTI", "running an if0 task")
PAIR_B("PTs", "PTS", "scheduling a task")
PAIR_B("PTg", "PTG", "a taskgroup")
PAIR_B("PTt", "PTT", "a taskwait")
PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
PAIR_B("PTy", "PTY", "a taskyield")
PAIR_E("PA[", "PA]", "the attached state")
PAIR_B("PMi", "PMI", "microtask internal")
PAIR_B("PMu", "PMU", "microtask user code")
PAIR_B("PH[", "PH]", "worker loop")
PAIR_B("PCf", "PCF", "fork call")
PAIR_B("PCi", "PCI", "initialization")
{ NULL, NULL }, { NULL, NULL },
}; };
@ -75,6 +90,10 @@ static const int chan_stack[CH_MAX] = {
[CH_SUBSYSTEM] = 1, [CH_SUBSYSTEM] = 1,
}; };
static const int chan_dup[CH_MAX] = {
[CH_SUBSYSTEM] = 1,
};
/* ----------------- pvt ------------------ */ /* ----------------- pvt ------------------ */
static const int pvt_type[CH_MAX] = { static const int pvt_type[CH_MAX] = {
@ -86,26 +105,42 @@ static const char *pcf_prefix[CH_MAX] = {
}; };
static const struct pcf_value_label openmp_subsystem_values[] = { static const struct pcf_value_label openmp_subsystem_values[] = {
{ ST_ATTACHED, "Attached" }, /* Work-distribution */
{ ST_JOIN_BARRIER, "Join barrier" }, { ST_WD_DISTRIBUTE, "Work-distribution: Distribute" },
{ ST_BARRIER, "Barrier" }, { ST_WD_FOR_DYNAMIC_CHUNK, "Work-distribution: Dynamic for chunk" },
{ ST_TASKING_BARRIER, "Tasking barrier" }, { ST_WD_FOR_DYNAMIC_INIT, "Work-distribution: Dynamic for initialization" },
{ ST_SPIN_WAIT, "Spin wait" }, { ST_WD_FOR_STATIC, "Work-distribution: Static for chunk" },
{ ST_FOR_STATIC, "For static" }, { ST_WD_SECTION, "Work-distribution: Section" },
{ ST_FOR_DYNAMIC_INIT, "For dynamic init" }, { ST_WD_SINGLE, "Work-distribution: Single" },
{ ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" }, /* Task */
{ ST_SINGLE, "Single" }, { ST_TASK_ALLOC, "Task: Allocation" },
{ ST_RELEASE_DEPS, "Release deps" }, { ST_TASK_CHECK_DEPS, "Task: Check deps" },
{ ST_TASKWAIT_DEPS, "Taskwait deps" }, { ST_TASK_DUP_ALLOC, "Task: Duplicating" },
{ ST_INVOKE_TASK, "Invoke task" }, { ST_TASK_RELEASE_DEPS, "Task: Releasing deps" },
{ ST_INVOKE_TASK_IF0, "Invoke task if0" }, { ST_TASK_RUN, "Task: Running task" },
{ ST_TASK_ALLOC, "Task alloc" }, { ST_TASK_RUN_IF0, "Task: Running task if0" },
{ ST_TASK_SCHEDULE, "Task schedule" }, { ST_TASK_SCHEDULE, "Task: Scheduling" },
{ ST_TASKWAIT, "Taskwait" }, { ST_TASK_TASKGROUP, "Task: Taskgroup" },
{ ST_TASKYIELD, "Taskyield" }, { ST_TASK_TASKWAIT, "Task: Taskwait" },
{ ST_TASK_DUP_ALLOC, "Task dup alloc" }, { ST_TASK_TASKWAIT_DEPS, "Task: Taskwait deps" },
{ ST_CHECK_DEPS, "Check deps" }, { ST_TASK_TASKYIELD, "Task: Taskyield" },
{ ST_TASKGROUP, "Taskgroup" }, /* Critical */
{ ST_CRITICAL_ACQ, "Critical: Acquiring" },
{ ST_CRITICAL_REL, "Critical: Releasing" },
{ ST_CRITICAL_SECTION, "Critical: Section" },
/* Barrier */
{ ST_BARRIER_FORK, "Barrier: Fork" },
{ ST_BARRIER_JOIN, "Barrier: Join" },
{ ST_BARRIER_PLAIN, "Barrier: Plain" },
{ ST_BARRIER_TASK, "Barrier: Task" },
{ ST_BARRIER_SPIN_WAIT, "Barrier: Spin wait" },
/* Runtime */
{ ST_RT_ATTACHED, "Runtime: Attached" },
{ ST_RT_FORK_CALL, "Runtime: Fork call" },
{ ST_RT_INIT, "Runtime: Initialization" },
{ ST_RT_MICROTASK_INTERNAL, "Runtime: Internal microtask" },
{ ST_RT_MICROTASK_USER, "Runtime: User microtask" },
{ ST_RT_WORKER_LOOP, "Runtime: Worker main loop" },
{ -1, NULL }, { -1, NULL },
}; };
@ -114,7 +149,7 @@ static const struct pcf_value_label *pcf_labels[CH_MAX] = {
}; };
static const long prv_flags[CH_MAX] = { static const long prv_flags[CH_MAX] = {
[CH_SUBSYSTEM] = PRV_SKIPDUP, [CH_SUBSYSTEM] = PRV_EMITDUP,
}; };
static const struct model_pvt_spec pvt_spec = { static const struct model_pvt_spec pvt_spec = {
@ -127,7 +162,7 @@ static const struct model_pvt_spec pvt_spec = {
/* ----------------- tracking ------------------ */ /* ----------------- tracking ------------------ */
static const int th_track[CH_MAX] = { static const int th_track[CH_MAX] = {
[CH_SUBSYSTEM] = TRACK_TH_RUN, [CH_SUBSYSTEM] = TRACK_TH_ACT,
}; };
static const int cpu_track[CH_MAX] = { static const int cpu_track[CH_MAX] = {
@ -141,6 +176,7 @@ static const struct model_chan_spec th_chan = {
.prefix = model_name, .prefix = model_name,
.ch_names = chan_name, .ch_names = chan_name,
.ch_stack = chan_stack, .ch_stack = chan_stack,
.ch_dup = chan_dup,
.pvt = &pvt_spec, .pvt = &pvt_spec,
.track = th_track, .track = th_track,
}; };
@ -150,6 +186,7 @@ static const struct model_chan_spec cpu_chan = {
.prefix = model_name, .prefix = model_name,
.ch_names = chan_name, .ch_names = chan_name,
.ch_stack = chan_stack, .ch_stack = chan_stack,
.ch_dup = chan_dup,
.pvt = &pvt_spec, .pvt = &pvt_spec,
.track = cpu_track, .track = cpu_track,
}; };

View File

@ -4,3 +4,4 @@
add_subdirectory(nanos6) add_subdirectory(nanos6)
add_subdirectory(nodes) add_subdirectory(nodes)
add_subdirectory(nosv) add_subdirectory(nosv)
add_subdirectory(openmp)

View File

@ -0,0 +1,50 @@
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
# SPDX-License-Identifier: GPL-3.0-or-later
check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND)
check_linker_flag(C "-fopenmp=libompv" OPENMPV_LINKER_FOUND)
cmake_path(GET CMAKE_C_COMPILER PARENT_PATH CMAKE_C_COMPILER_PATH)
if(NOT OPENMPV_COMPILER_FOUND OR NOT OPENMPV_LINKER_FOUND)
if(ENABLE_ALL_TESTS)
message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable OpenMP-V RT tests")
else()
message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling OpenMP-V RT tests")
endif()
return()
endif()
function(openmp_rt_test)
ovni_test(${ARGN})
target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv"
"-no-pedantic")
target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv")
target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m")
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
ENVIRONMENT "OMP_OVNI=1")
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni")
endfunction()
openmp_rt_test(barrier-explicit.c)
openmp_rt_test(critical.c)
openmp_rt_test(if0-nested-task.c)
openmp_rt_test(if0.c)
openmp_rt_test(multi-parallels.c)
openmp_rt_test(parallel-for.c)
openmp_rt_test(parallel-loop.c)
openmp_rt_test(parallel-nested.c)
openmp_rt_test(parallel-task.c)
openmp_rt_test(sections.c)
openmp_rt_test(simple-task.c)
openmp_rt_test(task.c)
openmp_rt_test(taskloop.c)
openmp_rt_test(taskwait.c)
openmp_rt_test(team-distribute.c)
openmp_rt_test(worksharing-and-tasks.c)
openmp_rt_test(worksharing-mix.c)
openmp_rt_test(worksharing-task.c)
openmp_rt_test(worksharing.c)
openmp_rt_test(worksharing01.c)
openmp_rt_test(worksharing02.c)
openmp_rt_test(worksharing03.c)

View File

@ -0,0 +1,47 @@
#include <stdio.h>
#include <math.h>
#include "compat.h"
#define N 100
static void
dummy_work(double *x, int i)
{
sleep_us(i);
x[i] += sqrt((double) i);
}
int main(void)
{
double x[N] = { 0 };
#pragma omp parallel
{
#pragma omp single
{
for (int i = 0; i < N; i++) {
#pragma omp task shared(x)
dummy_work(x, i);
}
}
sleep_us(200);
#pragma omp barrier
sleep_us(1000);
#pragma omp barrier
#pragma omp single
{
for (int i = 0; i < N; i++) {
#pragma omp task shared(x)
dummy_work(x, i);
}
}
}
double sum = 0.0;
for (int i = 0; i < N; i++)
sum += x[i];
printf("sum = %e\n", sum);
return 0;
}

16
test/rt/openmp/critical.c Normal file
View File

@ -0,0 +1,16 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
sleep_us(1000);
#pragma omp critical
sleep_us(200);
sleep_us(1000);
}
return 0;
}

View File

@ -0,0 +1,20 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp task if(0)
{
#pragma omp task
{
sleep_us(1000);
}
#pragma omp taskwait
}
}
return 0;
}

17
test/rt/openmp/if0.c Normal file
View File

@ -0,0 +1,17 @@
#include <omp.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp task if(0)
{
sleep_us(1000);
}
}
return 0;
}

View File

@ -0,0 +1,13 @@
#include "compat.h"
int main(void)
{
for (int i = 0; i < 10; i++) {
#pragma omp parallel
{
sleep_us(1000);
}
}
return 0;
}

View File

@ -0,0 +1,29 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
}
return 0;
}

View File

@ -0,0 +1,14 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp loop
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
}
return 0;
}

View File

@ -0,0 +1,22 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
#pragma omp parallel
{
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
}
}
return 0;
}

View File

@ -0,0 +1,26 @@
#include "compat.h"
static void foo(void)
{
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++)
sleep_us(i);
#pragma omp single
for (int i = 0; i < 100; i++)
{
#pragma omp task
sleep_us(10);
}
}
int main(void)
{
#pragma omp parallel
{
foo();
foo();
}
return 0;
}

51
test/rt/openmp/sections.c Normal file
View File

@ -0,0 +1,51 @@
#include <stdio.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel sections
{
#pragma omp section
{ sleep_us(1001); printf("1001\n"); }
#pragma omp section
{ sleep_us(1002); printf("1002\n"); }
#pragma omp section
{ sleep_us(1003); printf("1003\n"); }
#pragma omp section
{ sleep_us(1004); printf("1004\n"); }
#pragma omp section
sleep_us(1005);
#pragma omp section
sleep_us(1006);
#pragma omp section
sleep_us(1007);
#pragma omp section
sleep_us(1008);
#pragma omp section
sleep_us(1009);
#pragma omp section
sleep_us(1010);
#pragma omp section
sleep_us(1011);
#pragma omp section
sleep_us(1012);
#pragma omp section
sleep_us(1013);
#pragma omp section
sleep_us(1014);
#pragma omp section
sleep_us(1015);
#pragma omp section
sleep_us(1016);
#pragma omp section
sleep_us(1017);
#pragma omp section
sleep_us(1018);
#pragma omp section
sleep_us(1019);
#pragma omp section
sleep_us(1020);
}
return 0;
}

View File

@ -0,0 +1,23 @@
#include "compat.h"
int main(void)
{
int a;
int *p = &a;
#pragma omp parallel
#pragma omp single
{
#pragma omp task depend(out : p[0])
{
sleep_us(1000);
}
for (int i = 0; i < 10000; i++)
{
#pragma omp task depend(in : p[0])
sleep_us(1);
}
}
return 0;
}

19
test/rt/openmp/task.c Normal file
View File

@ -0,0 +1,19 @@
#include <stdio.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
for (int i = 0; i < 10; i++) {
#pragma omp task
{
printf("%d\n", i);
sleep_us(100);
}
}
#pragma omp barrier
}
return 0;
}

17
test/rt/openmp/taskloop.c Normal file
View File

@ -0,0 +1,17 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp taskloop
for (int i = 0; i < 10000; i++)
{
#pragma omp task
sleep_us(1);
}
}
return 0;
}

42
test/rt/openmp/taskwait.c Normal file
View File

@ -0,0 +1,42 @@
#include "compat.h"
#include <stdio.h>
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp task label("A")
{
sleep_us(5000);
printf("A\n");
}
#pragma omp task label("B")
{
#pragma omp task label("B1")
{
sleep_us(2000);
printf("B1\n");
}
/* Shouldn't wait for task A */
#pragma omp taskwait
#pragma omp task
{
sleep_us(1000);
printf("B2\n");
}
}
#pragma omp task label("C")
{
printf("C\n");
}
}
/* Expected output C B1 B2 A */
return 0;
}

View File

@ -0,0 +1,14 @@
#include <omp.h>
#include "compat.h"
int main(void)
{
#pragma omp teams num_teams(2)
{
#pragma omp distribute parallel for
for (volatile int i = 0; i < 1000; i++)
sleep_us(100 + i);
}
return 0;
}

View File

@ -0,0 +1,34 @@
#include <stdio.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
//#pragma omp single nowait
for (int i = 0; i < 100; i++) {
#pragma omp task
sleep_us(10);
}
/* Wait a bit for task allocation */
sleep_us(1000);
/* Occupy 4 CPUs with sections */
#pragma omp sections nowait
{
#pragma omp section
{ sleep_us(1001); printf("1001\n"); }
#pragma omp section
{ sleep_us(1002); printf("1002\n"); }
#pragma omp section
{ sleep_us(1003); printf("1003\n"); }
#pragma omp section
{ sleep_us(1004); printf("1004\n"); }
}
#pragma omp taskwait
}
return 0;
}

View File

@ -0,0 +1,66 @@
#include <omp.h>
#include <stdio.h>
#include "compat.h"
/* Test several work-distribution and task constructs, so we can generate a
* trace that includes most of the states. */
int main(void)
{
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp sections
{
#pragma omp section
{ sleep_us(101); printf("101\n"); }
#pragma omp section
{ sleep_us(102); printf("102\n"); }
#pragma omp section
{ sleep_us(103); printf("103\n"); }
#pragma omp section
{ sleep_us(104); printf("104\n"); }
}
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp single
for (int i = 0; i < 100; i++)
{
#pragma omp task
sleep_us(10);
}
}
#pragma omp parallel
{
#pragma omp critical
sleep_us(20);
#pragma omp barrier
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
}
// FIXME: Crashes OpenMP-V runtime
//#pragma omp distribute parallel for
//for (int i = 0; i < 1000; i++) {
// sleep_us(1);
//}
return 0;
}

View File

@ -0,0 +1,25 @@
#include <omp.h>
#include "compat.h"
static void foo(void)
{
#pragma omp for
for (int i = 0; i < 100; ++i)
{
#pragma omp task
{
sleep_us(1);
}
}
}
int main(void)
{
#pragma omp parallel
{
foo();
foo();
}
return 0;
}

View File

@ -0,0 +1,16 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp for schedule(dynamic) ordered label("omp for dynamic")
for (int i = 0; i < 100; i++)
sleep_us(100);
#pragma omp single label("single")
sleep_us(1000);
}
return 0;
}

View File

@ -0,0 +1,11 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp for schedule(static, 30)
for (int i = 0; i < 100; i++)
sleep_us(10);
return 0;
}

View File

@ -0,0 +1,12 @@
#include "compat.h"
int main(void)
{
#pragma omp target
#pragma omp teams num_teams(1)
#pragma omp distribute dist_schedule(static, 30)
for (int i = 0; i < 100; i++)
sleep_us(10);
return 0;
}

View File

@ -0,0 +1,11 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp for schedule(dynamic)
for (int i = 0; i < 100; i++)
sleep_us(10);
return 0;
}