Update OpenMP emulation model

Add more tests, subsystem states and documentation.
This commit is contained in:
Rodrigo Arias 2024-01-16 12:21:16 +01:00
parent bf2b3b73a0
commit 55318d9da7
36 changed files with 1116 additions and 287 deletions

View File

@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Add OpenMP model (`P`) at version 1.1.0 (currently it only supports subsystems
and only works with the OpenMP-V runtime, on top of nOS-V).
### Changed
- Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}.

View File

@ -16,6 +16,8 @@ window_height 150
window_comm_lines_enabled true
window_flags_enabled false
window_noncolor_mode true
window_custom_color_enabled true
window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true

View File

@ -4,9 +4,9 @@ ConfigFile.NumWindows: 1
################################################################################
< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread >
< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the ACTIVE thread >
################################################################################
window_name Thread: OpenMP subsystem of the RUNNING thread
window_name Thread: OpenMP subsystem of the ACTIVE thread
window_type single
window_id 1
window_position_x 0
@ -16,6 +16,8 @@ window_height 150
window_comm_lines_enabled true
window_flags_enabled false
window_noncolor_mode true
window_custom_color_enabled true
window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true
@ -38,5 +40,5 @@ window_labels_to_draw 1
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
window_filter_module evt_type 1 50
window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread"
window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the ACTIVE thread"

View File

@ -433,86 +433,130 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`:
List of events for the model *openmp* with identifier **`P`** at version `1.1.0`:
<dl>
<dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt>
<dd>enters the attached state</dd>
<dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt>
<dd>leaves the attached state</dd>
<dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt>
<dd>enters a join barrier</dd>
<dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt>
<dd>leaves a join barrier</dd>
<dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt>
<dd>enters a barrier</dd>
<dd>begins plain barrier</dd>
<dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt>
<dd>leaves a barrier</dd>
<dd>ceases plain barrier</dd>
<dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt>
<dd>begins join barrier</dd>
<dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt>
<dd>ceases join barrier</dd>
<dt><a id="PBf" href="#PBf"><pre>PBf</pre></a></dt>
<dd>begins fork barrier</dd>
<dt><a id="PBF" href="#PBF"><pre>PBF</pre></a></dt>
<dd>ceases fork barrier</dd>
<dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt>
<dd>enters a tasking barrier</dd>
<dd>begins tasking barrier</dd>
<dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt>
<dd>leaves a tasking barrier</dd>
<dd>ceases tasking barrier</dd>
<dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt>
<dd>enters a spin wait</dd>
<dd>begins spin wait</dd>
<dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt>
<dd>leaves a spin wait</dd>
<dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt>
<dd>begins static for</dd>
<dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt>
<dd>ceases static for</dd>
<dd>ceases spin wait</dd>
<dt><a id="PIa" href="#PIa"><pre>PIa</pre></a></dt>
<dd>begins critical acquiring</dd>
<dt><a id="PIA" href="#PIA"><pre>PIA</pre></a></dt>
<dd>ceases critical acquiring</dd>
<dt><a id="PIr" href="#PIr"><pre>PIr</pre></a></dt>
<dd>begins critical releasing</dd>
<dt><a id="PIR" href="#PIR"><pre>PIR</pre></a></dt>
<dd>ceases critical releasing</dd>
<dt><a id="PI[" href="#PI["><pre>PI[</pre></a></dt>
<dd>begins critical section</dd>
<dt><a id="PI]" href="#PI]"><pre>PI]</pre></a></dt>
<dd>ceases critical section</dd>
<dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt>
<dd>begins dynamic for init</dd>
<dd>begins distribute</dd>
<dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt>
<dd>ceases distribute</dd>
<dt><a id="PWy" href="#PWy"><pre>PWy</pre></a></dt>
<dd>begins dynamic for init</dd>
<dt><a id="PWY" href="#PWY"><pre>PWY</pre></a></dt>
<dd>ceases dynamic for init</dd>
<dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt>
<dd>begins dynamic for chunk</dd>
<dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt>
<dd>ceases dynamic for chunk</dd>
<dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt>
<dd>begins static for</dd>
<dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt>
<dd>ceases static for</dd>
<dt><a id="PWe" href="#PWe"><pre>PWe</pre></a></dt>
<dd>begins section</dd>
<dt><a id="PWE" href="#PWE"><pre>PWE</pre></a></dt>
<dd>ceases section</dd>
<dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt>
<dd>begins single</dd>
<dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt>
<dd>ceases single</dd>
<dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt>
<dd>begins releasing task dependencies</dd>
<dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt>
<dd>ceases releasing task dependencies</dd>
<dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt>
<dd>begins waiting for taskwait dependencies</dd>
<dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt>
<dd>ceases waiting for taskwait dependencies</dd>
<dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt>
<dd>begins invoking a task</dd>
<dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt>
<dd>ceases invoking a task</dd>
<dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt>
<dd>begins invoking an if0 task</dd>
<dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt>
<dd>ceases invoking an if0 task</dd>
<dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt>
<dd>begins task allocation</dd>
<dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt>
<dd>ceases task allocation</dd>
<dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt>
<dd>begins scheduling a task</dd>
<dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt>
<dd>ceases scheduling a task</dd>
<dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt>
<dd>enters a taskwait</dd>
<dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt>
<dd>leaves a taskwait</dd>
<dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt>
<dd>enters a taskyield</dd>
<dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt>
<dd>leaves a taskyield</dd>
<dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt>
<dd>begins duplicating a task</dd>
<dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt>
<dd>ceases duplicating a task</dd>
<dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt>
<dd>begins checking task dependencies</dd>
<dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt>
<dd>ceases checking task dependencies</dd>
<dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt>
<dd>begins duplicating a task</dd>
<dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt>
<dd>ceases duplicating a task</dd>
<dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt>
<dd>begins releasing task dependencies</dd>
<dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt>
<dd>ceases releasing task dependencies</dd>
<dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt>
<dd>begins running a task</dd>
<dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt>
<dd>ceases running a task</dd>
<dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt>
<dd>begins running an if0 task</dd>
<dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt>
<dd>ceases running an if0 task</dd>
<dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt>
<dd>begins scheduling a task</dd>
<dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt>
<dd>ceases scheduling a task</dd>
<dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt>
<dd>enters a taskgroup</dd>
<dd>begins a taskgroup</dd>
<dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt>
<dd>leaves a taskgroup</dd>
<dd>ceases a taskgroup</dd>
<dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt>
<dd>begins a taskwait</dd>
<dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt>
<dd>ceases a taskwait</dd>
<dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt>
<dd>begins waiting for taskwait dependencies</dd>
<dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt>
<dd>ceases waiting for taskwait dependencies</dd>
<dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt>
<dd>begins a taskyield</dd>
<dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt>
<dd>ceases a taskyield</dd>
<dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt>
<dd>enters the attached state</dd>
<dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt>
<dd>leaves the attached state</dd>
<dt><a id="PMi" href="#PMi"><pre>PMi</pre></a></dt>
<dd>begins microtask internal</dd>
<dt><a id="PMI" href="#PMI"><pre>PMI</pre></a></dt>
<dd>ceases microtask internal</dd>
<dt><a id="PMu" href="#PMu"><pre>PMu</pre></a></dt>
<dd>begins microtask user code</dd>
<dt><a id="PMU" href="#PMU"><pre>PMU</pre></a></dt>
<dd>ceases microtask user code</dd>
<dt><a id="PH[" href="#PH["><pre>PH[</pre></a></dt>
<dd>begins worker loop</dd>
<dt><a id="PH]" href="#PH]"><pre>PH]</pre></a></dt>
<dd>ceases worker loop</dd>
<dt><a id="PCf" href="#PCf"><pre>PCf</pre></a></dt>
<dd>begins fork call</dd>
<dt><a id="PCF" href="#PCF"><pre>PCF</pre></a></dt>
<dd>ceases fork call</dd>
<dt><a id="PCi" href="#PCi"><pre>PCi</pre></a></dt>
<dd>begins initialization</dd>
<dt><a id="PCI" href="#PCI"><pre>PCI</pre></a></dt>
<dd>ceases initialization</dd>
</dl>
## Model tampi

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

@ -1,164 +1,243 @@
# OpenMP Model
# OpenMP model
The LLVM OpenMP Runtime is an integral component of the LLVM compiler
infrastructure that provides support for the OpenMP (Open Multi-Processing)
programming model.
The [OpenMP programming model](https://www.openmp.org) is a widely used API and
set of directives for parallel programming, allowing developers to write
multi-threaded and multi-process applications more easily. In this document we
refer to the
[version 5.2 of the OpenMP specification](https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf).
OpenMP is a widely used API and set of directives for parallel programming,
allowing developers to write multi-threaded and multi-process applications more
easily.
The [LLVM OpenMP Runtime](https://openmp.llvm.org/design/Runtimes.html) provides
an implementation of the OpenMP specification as a component of the LLVM
compiler infrastructure. We have modified the LLVM OpenMP runtime to run on top
of the [nOS-V](https://gitlab.bsc.es/nos-v/nos-v) runtime as part of the
[OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **OpenMP-V**.
This documentation is about an OpenMP runtime built on top of [nOS-V][nosv],
leveraging its thread management capabilities while retaining the fundamental
characteristics of the original runtime.
We have added instrumentation events to OpenMP-V designed to be enabled along
the [nOS-V instrumentation](nosv.md). This document describes all the
instrumentation features included in our modified OpenMP-V runtime to identify
what is happening. This data is useful for both users and developers of the
OpenMP runtime to analyze issues and undesired behaviors.
While the modifications introduced to the runtime may appear to be minor, it's
important to note that this enhanced version is not API compatible with the
original runtime. As a result, it is mandatory to use the clang built in the same
[LLVM Project][llvm].
!!! Note
This document describes all the instrumentation features included in the runtime
by both nOS-V and OpenMP to monitor task execution and the execution flow within
the runtime library to identify what is happening. This data is useful for both
users and developers of the OpenMP runtime to analyze issues and undesired
behaviors.
Instrumenting the original OpenMP runtime from the LLVM project is planned
but is not yet posible. For now you must use the modified OpenMP-V runtime
with nOS-V.
[llvm]: https://pm.bsc.es/llvm-ompss
[nosv]: https://gitlab.bsc.es/nos-v/nos-v
## Enable the instrumentation
## How to Generate Execution Traces
To generate runtime traces, you will have to:
In order to build the OpenMP runtime nOS-V must be provided by using
`PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a
runtime without instrumentation. However, the user may be able to generate
execution traces by enabling nOS-V instrumentation through
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a
nOS-V installation built with ovni.
1. **Build nOS-V with ovni support:** Refer to the
[nOS-V
documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md).
Typically you should use the `--with-ovni` option at configure time to specify
where ovni is installed.
2. **Build OpenMP-V with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH`
environment variable to specify the nOS-V and ovni installation
when configuring CMake.
3. **Enable the instrumentation in nOS-V at runtime:** Refer to the
[nOS-V documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md)
to find out how to enable the tracing at runtime. Typically you can just set
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
4. **Enable the instrumentation of OpenMP-V at runtime:** Set the environment
variable `OMP_OVNI=1`.
Building OpenMP with instrumentation requires to pass ovni pkg-config path to
`PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is
because OpenMP is dependent of nOS-V to generate complete execution traces.
Currently there is only support for the subsystem view, which is documented
below. The view is complemented with the information of [nOS-V views](nosv.md),
as OpenMP-V uses nOS-V tasks to run the workers.
By default, OpenMP will not instrument anything. To enable instrumentation the
user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
## Subsystem view
The following sections will describe the OpenMP execution trace views and what
information is shown there.
## nOS-V Task Type
As said in the previous sections. This OpenMP runtime is built on top of nOS-V.
So the user can explore what does the execution do there. Here we only describe
the task type view. For other views please take a look at the nOS-V chapter.
In OpenMP, every thread that is launched (main thread included) is shown in a task
type with label "openmp". In a task application, every task call will be seen with
a task type with label "file:line:col" format referring to the pragma location. This
can be changed by using the clause label(string-literal).
OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for
more information. Nevertheless, the OpenMP task view shows it.
## OpenMP Subsystem
![Subsystem view example](fig/openmp-subsystem.png)
This view illustrates the activities of each thread with different states:
- **Attached**: The thread is attached.
- **Work-distribution subsystem**: Related to work-distribution constructs,
[in Chapter 11][workdis].
- **Join barrier**: The thread is in the implicit barrier of the parallel region.
- **Distribute**: Running a *Distribute* region.
- **Tasking barrier**: The thread is in the additional tasking barrier trying to
execute tasks. This event happens if executed with KMP_TASKING=1.
- **Dynamic for chunk**: Running a chunk of a dynamic *for*, which often
involve running more than one iteration of the loop. See the
[limitations](#dynamic_for) below.
- **Spin wait**: The thread spin waits for a condition. Usually this event happens
in a barrier while waiting for the other threads to reach the barrier. The thread
also tries to execute tasks.
- **Dynamic for initialization**: Preparing a dynamic *for*.
- **For static**: Executing a for static. The length of the event represents all the
chunks of iterations executed by the thread. See "Limitations" section.
- **Static for chunk**: Executing the assigned iterations of an static
*for*.
- **For dynamic init**: Running the initialization of an OpenMP for dynamic.
- **Single**: Running a *Single* region. All threads of the parallel region
participate.
- **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To
clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and
from 8 to 12, two different events will be shown. See "Limitations" section.
- **Section**: Running a *Section* region. All threads of the parallel region
participate.
- **Single**: Running a Single region. All threads of the parallel region will emit
the event.
- **Task subsystem**: Related to tasking constructs, [in Chapter 12][tasking].
- **Release deps**: When finishing a task, trying to release dependencies. This
event happens although the task has no dependencies.
- **Allocation**: Allocating the task descriptor.
- **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled.
This appears typically in a task if0 with dependencies or a taskwait with deps.
- **Check deps**: Checking if the task has pending dependencies to be
fulfilled. When all dependencies are fulfilled the task will be scheduled.
- **Invoke task**: Executing a task.
- **Duplicating**: Duplicating the task descriptor in a taskloop.
- **Invoke task if0**: Executing a task if0.
- **Releasing deps**: Releasing dependencies at the end of a task. This
state is always present even if the task has no dependencies.
- **Task alloc**: Allocating the task descriptor.
- **Running task**: Executing a task.
- **Task schedule**: Adding the task to the scheduler.
- **Running task if0**: Executing a task if0.
- **Taskwait**: Running a taskwait.
- **Scheduling**: Adding the task to the scheduler for execution.
- **Taskyield**: Running a taskyield.
- **Taskgroup**: Waiting in a *taskgroup* construct.
- **Task dup alloc**: Duplicating the task descriptor in a taskloop.
- **Taskwait**: Waiting in a *taskwait* construct.
- **Check deps**: Checking if the task has pending dependencies to be fulfilled. This
means that if all dependencies are fulfilled the task will be scheduled.
- **Taskwait deps**: Trying to execute tasks until dependencies have been
fulfilled. This appears typically in a task if0 with dependencies or a
taskwait with deps.
- **Taskgroup**: Running a taskgroup.
- **Taskyield**: Performing a *taskyield* construct.
- **Critical subsystem**: Related to the *critical* Constuct, in [Section 15.2][critical].
- **Acquiring**: Waiting to acquire a *Critical* section.
- **Section**: Running the *Critical* section.
- **Releasing**: Waiting to release a *Critical* section.
- **Barrier subsystem**: Related to barriers, in [Section 15.3][barrier].
**All barriers can try to execute tasks**.
- **Barrier: Fork**: Workers wait for a release signal from the master thread to
continue. The master can continue as soon as it signals the workers. It is
done at the beginning of a fork-join region.
- **Barrier: Join**: The master thread waits until all workers finish their work.
Workers can continue as soon as they signal the master. It is done at the
end of a fork-join region.
- **Barrier: Plain**: Performing a plain barrier, which waits for a release
signal from the master thread to continue. It is done at the beginning of
a fork-join region, in the `__kmp_join_barrier()` function.
- **Barrier: Task**: Blocked in an additional tasking barrier *until all previous
tasks have been executed*. Only happens when executed with `KMP_TASKING=1`.
- **Runtime subsystem**: Internal operations of the runtime.
- **Attached**: Present after the call to `nosv_attach()` and before
`nosv_detach()`. This state is a hack.
- **Fork call**: Preparing a parallel section using the fork-join model.
Only called from the master thread.
- **Init**: Initializing the OpenMP-V runtime.
- **Internal microtask**: Running a internal OpenMP-V function as a microtask.
- **User microtask**: Running user code as a microtask in a worker thread.
- **Worker main Loop**: Running the main loop, where the workers run the
fork barrier, run a microtask and perform a join barrier until there is no
more work.
!!! Note
The generated HTML version of the OpenMP 5.2 specification has some parts
missing, so we link directly to the PDF file which may not work in some
browsers.
[workdis]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.11
[tasking]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.12
[critical]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.2
[barrier]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.3
## Limitations
By the way how OpenMP is implemented. There are some instrumentation points that
violate ovni subsystem rules. This mostly happens because some directives are lowered
partially in the transformed user code, so it is not easy to wrap them into a
Single-entry single-exit (SESE) region, like we would do with a regular task invocation,
for example.
As the compiler generates the code that perform the calls to the OpenMP-V
runtime, there are some parts of the execution that are complicated to
instrument by just placing a pair of events to delimite a function.
All problematic directives are described here so the user is able to understand what
is being show in the traces
For those cases we use an approximation which is documented in the following
subsections.
- **Task if0**: The lowered user code of a task if0 is:
... = __kmpc_omp_task_alloc(...);
__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
__kmpc_omp_task_begin_if0(...);
// Call to the user code
omp_task_entry_(...);
__kmpc_omp_task_complete_if0(...);
### Dynamic for
Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As
this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0`
and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent.
The generated code of a *dynamic for* has the following structure:
- **For static**: The lowered user code of a for static is:
// Parallel code
__kmpc_for_static_init_4(...);
for ( i = ...; i <= ...; ++i )
;
__kmpc_for_static_fini(...);
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4`
and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent.
- **For dynamic**: The lowered user code of a for dynamic is:
__kmpc_dispatch_init_4(...);
while ( __kmpc_dispatch_next_4(...))
{
for ( i = ...; i <= ...; ++i )
;
```c
__kmpc_dispatch_init_4(...);
while (__kmpc_dispatch_next_4(...)) {
for (i = ...; i <= ...; i++) {
// User code ...
}
}
```
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
this code is generated by the compiler the subsystem view shows:
1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init**
2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`.
with the event **For dynamic chunk**.
3. How long it takes to run a loop iteration chunk between the last and the previous
`__kmpc_dispatch_next_4` call with the event **For dynamic chunk**.
The function `__kmpc_dispatch_next_4()` returns `true` if there are more
chunks (group of iterations) to be executed by the thread, otherwise it returns
`false`.
Ideally we want to instrument each chunk with a pair of begin and end events.
The problem with the instrumentation is that there is no easy way of determining
if the call to `__kmpc_dispatch_next_4()` is processing the first chunk, just
after `__kmpc_dispatch_init_4()`, or is coming from other chunks due to the
while loop.
Therefore, from the `__kmpc_dispatch_next_4()` alone, we cannot determine if we
need to only emit a single "begin a new chunk" event or we need to emit the pair
of events "finish the last chunk" and "begin a new one".
So, as a workaround, we emit an event from the end of `__kmpc_dispatch_init_4()`
starting a new chunk (which is fake), and then from `__kmpc_dispatch_next_4()` we
always emit the "finish the last chunk" and "begin a new one" events (unless
there are no more chunks, in which case we don't emit the "begin a new one"
event).
This will cause an spurious *Work-distribution: Dynamic for chunk* state at the
beginning of each dynamic for, which should be very short and is not really a
chunk.
### Static for
The generated code of an *static for* has the following structure:
```c
__kmpc_for_static_init_4(...);
for (i = ...; i <= ...; i++) {
// User code ...
}
__kmpc_for_static_fini(...);
```
As this code is generated by the compiler we cannot easily add the begin/end
pair of events to mark the *Work-distribution: Static for chunk* state.
We assume that by placing the "begin processing a chunk" event at the end of
`__kmpc_for_static_init_4()` and the "end processing the chunk" event at
the beginning of `__kmpc_for_static_fini()` is equivalent to adding the
events surrounding the for loop.
### Task if0
The generated code of an *if0 task* has the following structure:
```c
... = __kmpc_omp_task_alloc(...);
__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
__kmpc_omp_task_begin_if0(...);
// Call to the user code
omp_task_entry_(...);
__kmpc_omp_task_complete_if0(...);
```
Instead of injecting the begin and end events in the user code, we
approximate it by placing the "begin if0 task" event at the end of the
`__kmpc_omp_task_begin_if0` function and the "end if0 task" event at the
beginning of `__kmpc_omp_task_complete_if0`. This state will be shown as
*Task: Running task if0*.

View File

@ -7,11 +7,11 @@
]
},
"locked": {
"lastModified": 1701968480,
"narHash": "sha256-YoKN8FZllNQfpEpMqGOBv77kp9J0mlVRlhixWbcDqWg=",
"lastModified": 1705310446,
"narHash": "sha256-PaPnkGotb2omIV6OsS72MGkqNN6Q/iHLlXQZ6S3vWOY=",
"ref": "refs/heads/master",
"rev": "c4d5135fde108401417fdcdf5e1c8d11aeca4f32",
"revCount": 931,
"rev": "3b21a32d835ff06741d5d59cd023ff2ae1ecb19f",
"revCount": 932,
"type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs"
},

View File

@ -7,11 +7,15 @@
outputs = { self, nixpkgs, bscpkgs }:
let
# Set to true to replace all libovni in all runtimes with the current
# source. Causes large rebuilds on changes of ovni.
useLocalOvni = false;
ovniOverlay = final: prev: {
nosv = prev.nosv.override {
useGit = true;
gitBranch = "master";
gitCommit = "6a63fd4378ba458243dda3159500c1450edf0e82";
gitCommit = "9abad7d31476e97842d3b42f1fc1fb03d4cf817b";
};
nanos6 = prev.nanos6.override {
useGit = true;
@ -23,13 +27,27 @@
gitBranch = "master";
gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f";
};
clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override {
useGit = true;
gitBranch = "master";
gitCommit = "9dc4a4deea5e09850435782026eaae2f5290d886";
};
# Use a fixed commit for libovni
ovniFixed = prev.ovni.override {
useGit = true;
gitBranch = "master";
gitCommit = "68fc8b0eba299c3a7fa3833ace2c94933a26749e";
};
# Build with the current source
ovni = prev.ovni.overrideAttrs (old: rec {
ovniLocal = prev.ovni.overrideAttrs (old: rec {
pname = "ovni-local";
version = if self ? shortRev then self.shortRev else "dirty";
src = self;
cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ];
});
# Select correct ovni for libovni
ovni = if (useLocalOvni) then final.ovniLocal else final.ovniFixed;
};
pkgs = import nixpkgs {
system = "x86_64-linux";
@ -51,12 +69,12 @@
];
lib = pkgs.lib;
in {
packages.x86_64-linux.ovniPackages = rec {
packages.x86_64-linux.ovniPackages = {
# Allow inspection of packages from the command line
inherit pkgs;
} // rec {
# Build with the current source
local = pkgs.ovni.overrideAttrs (old: {
pname = "ovni-local";
src = self;
});
local = pkgs.ovniLocal;
# Build in Debug mode
debug = local.overrideAttrs (old: {
@ -97,12 +115,13 @@
# We need to be able to exit the chroot to run Nanos6 tests, as they
# require access to /sys for hwloc
__noChroot = true;
buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes ]);
buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes openmpv ]);
cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ];
preConfigure = old.preConfigure or "" + ''
export NOSV_HOME="${pkgs.nosv}"
export NODES_HOME="${pkgs.nodes}"
export NANOS6_HOME="${pkgs.nanos6}"
export OPENMP_RUNTIME="libompv"
'';
});

View File

@ -37,6 +37,7 @@ nav:
- user/emulation/nanos6.md
- user/emulation/tampi.md
- user/emulation/mpi.md
- user/emulation/openmp.md
- user/emulation/events.md
- CHANGELOG.md
- 'Developer guide':

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include "openmp_priv.h"
@ -14,53 +14,83 @@
enum { PUSH = 1, POP = 2, IGN = 3 };
static const int fn_table[256][256][3] = {
['A'] = {
['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED },
[']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED },
},
['B'] = {
['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER },
['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER },
['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER },
['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER },
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER },
['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER },
['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT },
['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT },
['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_PLAIN },
['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER_PLAIN },
['j'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_JOIN },
['J'] = { CH_SUBSYSTEM, POP, ST_BARRIER_JOIN },
['f'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_FORK },
['F'] = { CH_SUBSYSTEM, POP, ST_BARRIER_FORK },
['t'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_TASK },
['T'] = { CH_SUBSYSTEM, POP, ST_BARRIER_TASK },
['s'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
['S'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
},
['I'] = {
['a'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_ACQ },
['A'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_ACQ },
['r'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_REL },
['R'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_REL },
['['] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_SECTION },
[']'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_SECTION },
},
['W'] = {
['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC },
['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC },
['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT },
['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT },
['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK },
['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK },
['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE },
['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE },
['d'] = { CH_SUBSYSTEM, PUSH, ST_WD_DISTRIBUTE },
['D'] = { CH_SUBSYSTEM, POP, ST_WD_DISTRIBUTE },
['c'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_CHUNK },
['C'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_CHUNK },
['y'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_INIT },
['Y'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_INIT },
['s'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_STATIC },
['S'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_STATIC },
['e'] = { CH_SUBSYSTEM, PUSH, ST_WD_SECTION },
['E'] = { CH_SUBSYSTEM, POP, ST_WD_SECTION },
['i'] = { CH_SUBSYSTEM, PUSH, ST_WD_SINGLE },
['I'] = { CH_SUBSYSTEM, POP, ST_WD_SINGLE },
},
['T'] = {
['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS },
['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS },
['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS },
['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS },
['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK },
[']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK },
['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 },
['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 },
['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC },
['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC },
['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT },
['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT },
['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD },
['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD },
['c'] = { CH_SUBSYSTEM, PUSH, ST_TASK_CHECK_DEPS },
['C'] = { CH_SUBSYSTEM, POP, ST_TASK_CHECK_DEPS },
['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC },
['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC },
['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS },
['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS },
['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP },
['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP },
['r'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RELEASE_DEPS },
['R'] = { CH_SUBSYSTEM, POP, ST_TASK_RELEASE_DEPS },
['['] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN },
[']'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN },
['i'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN_IF0 },
['I'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN_IF0 },
['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
['g'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKGROUP },
['G'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKGROUP },
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT },
['T'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT },
['w'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT_DEPS },
['W'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT_DEPS },
['y'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKYIELD },
['Y'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKYIELD },
},
['A'] = {
['['] = { CH_SUBSYSTEM, PUSH, ST_RT_ATTACHED },
[']'] = { CH_SUBSYSTEM, POP, ST_RT_ATTACHED },
},
['M'] = {
['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_INTERNAL },
['I'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_INTERNAL },
['u'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_USER },
['U'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_USER },
},
['H'] = {
['['] = { CH_SUBSYSTEM, PUSH, ST_RT_WORKER_LOOP },
[']'] = { CH_SUBSYSTEM, POP, ST_RT_WORKER_LOOP },
},
['C'] = {
['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_INIT },
['I'] = { CH_SUBSYSTEM, POP, ST_RT_INIT },
['f'] = { CH_SUBSYSTEM, PUSH, ST_RT_FORK_CALL },
['F'] = { CH_SUBSYSTEM, POP, ST_RT_FORK_CALL },
},
};

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#ifndef OPENMP_PRIV_H
@ -15,28 +15,42 @@ enum openmp_chan {
CH_MAX,
};
enum openmp_function_values {
ST_ATTACHED = 1,
ST_JOIN_BARRIER,
ST_BARRIER,
ST_TASKING_BARRIER,
ST_SPIN_WAIT,
ST_FOR_STATIC,
ST_FOR_DYNAMIC_INIT,
ST_FOR_DYNAMIC_CHUNK,
ST_SINGLE,
ST_RELEASE_DEPS,
ST_TASKWAIT_DEPS,
ST_INVOKE_TASK,
ST_INVOKE_TASK_IF0,
ST_BARRIER_FORK = 1,
ST_BARRIER_JOIN,
ST_BARRIER_PLAIN,
ST_BARRIER_SPIN_WAIT,
ST_BARRIER_TASK,
/* Critical */
ST_CRITICAL_ACQ,
ST_CRITICAL_REL,
ST_CRITICAL_SECTION,
/* Work-distribution */
ST_WD_DISTRIBUTE,
ST_WD_FOR_DYNAMIC_CHUNK,
ST_WD_FOR_DYNAMIC_INIT,
ST_WD_FOR_STATIC,
ST_WD_SECTION,
ST_WD_SINGLE,
/* Task */
ST_TASK_ALLOC,
ST_TASK_SCHEDULE,
ST_TASKWAIT,
ST_TASKYIELD,
ST_TASK_CHECK_DEPS,
ST_TASK_DUP_ALLOC,
ST_CHECK_DEPS,
ST_TASKGROUP,
ST_TASK_RELEASE_DEPS,
ST_TASK_RUN,
ST_TASK_RUN_IF0,
ST_TASK_SCHEDULE,
ST_TASK_TASKGROUP,
ST_TASK_TASKWAIT,
ST_TASK_TASKWAIT_DEPS,
ST_TASK_TASKYIELD,
/* Runtime */
ST_RT_ATTACHED,
ST_RT_FORK_CALL,
ST_RT_INIT,
ST_RT_MICROTASK_INTERNAL,
ST_RT_MICROTASK_USER,
ST_RT_WORKER_LOOP,
};
struct openmp_thread {

View File

@ -26,29 +26,44 @@ static const char model_name[] = "openmp";
enum { model_id = 'P' };
static struct ev_decl model_evlist[] = {
PAIR_E("PA[", "PA]", "the attached state")
PAIR_B("PBb", "PBB", "plain barrier")
PAIR_B("PBj", "PBJ", "join barrier")
PAIR_B("PBf", "PBF", "fork barrier")
PAIR_B("PBt", "PBT", "tasking barrier")
PAIR_B("PBs", "PBS", "spin wait")
PAIR_E("PBj", "PBJ", "a join barrier")
PAIR_E("PBb", "PBB", "a barrier")
PAIR_E("PBt", "PBT", "a tasking barrier")
PAIR_E("PBs", "PBS", "a spin wait")
PAIR_B("PIa", "PIA", "critical acquiring")
PAIR_B("PIr", "PIR", "critical releasing")
PAIR_B("PI[", "PI]", "critical section")
PAIR_B("PWs", "PWS", "static for")
PAIR_B("PWd", "PWD", "dynamic for init")
PAIR_B("PWd", "PWD", "distribute")
PAIR_B("PWy", "PWY", "dynamic for init")
PAIR_B("PWc", "PWC", "dynamic for chunk")
PAIR_B("PWs", "PWS", "static for")
PAIR_B("PWe", "PWE", "section")
PAIR_B("PWi", "PWI", "single")
PAIR_B("PTr", "PTR", "releasing task dependencies")
PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
PAIR_B("PT[", "PT]", "invoking a task")
PAIR_B("PTi", "PTI", "invoking an if0 task")
PAIR_B("PTa", "PTA", "task allocation")
PAIR_B("PTs", "PTS", "scheduling a task")
PAIR_E("PTt", "PTT", "a taskwait")
PAIR_E("PTy", "PTY", "a taskyield")
PAIR_B("PTd", "PTD", "duplicating a task")
PAIR_B("PTc", "PTC", "checking task dependencies")
PAIR_E("PTg", "PTG", "a taskgroup")
PAIR_B("PTd", "PTD", "duplicating a task")
PAIR_B("PTr", "PTR", "releasing task dependencies")
PAIR_B("PT[", "PT]", "running a task")
PAIR_B("PTi", "PTI", "running an if0 task")
PAIR_B("PTs", "PTS", "scheduling a task")
PAIR_B("PTg", "PTG", "a taskgroup")
PAIR_B("PTt", "PTT", "a taskwait")
PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
PAIR_B("PTy", "PTY", "a taskyield")
PAIR_E("PA[", "PA]", "the attached state")
PAIR_B("PMi", "PMI", "microtask internal")
PAIR_B("PMu", "PMU", "microtask user code")
PAIR_B("PH[", "PH]", "worker loop")
PAIR_B("PCf", "PCF", "fork call")
PAIR_B("PCi", "PCI", "initialization")
{ NULL, NULL },
};
@ -75,6 +90,10 @@ static const int chan_stack[CH_MAX] = {
[CH_SUBSYSTEM] = 1,
};
static const int chan_dup[CH_MAX] = {
[CH_SUBSYSTEM] = 1,
};
/* ----------------- pvt ------------------ */
static const int pvt_type[CH_MAX] = {
@ -86,26 +105,42 @@ static const char *pcf_prefix[CH_MAX] = {
};
static const struct pcf_value_label openmp_subsystem_values[] = {
{ ST_ATTACHED, "Attached" },
{ ST_JOIN_BARRIER, "Join barrier" },
{ ST_BARRIER, "Barrier" },
{ ST_TASKING_BARRIER, "Tasking barrier" },
{ ST_SPIN_WAIT, "Spin wait" },
{ ST_FOR_STATIC, "For static" },
{ ST_FOR_DYNAMIC_INIT, "For dynamic init" },
{ ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" },
{ ST_SINGLE, "Single" },
{ ST_RELEASE_DEPS, "Release deps" },
{ ST_TASKWAIT_DEPS, "Taskwait deps" },
{ ST_INVOKE_TASK, "Invoke task" },
{ ST_INVOKE_TASK_IF0, "Invoke task if0" },
{ ST_TASK_ALLOC, "Task alloc" },
{ ST_TASK_SCHEDULE, "Task schedule" },
{ ST_TASKWAIT, "Taskwait" },
{ ST_TASKYIELD, "Taskyield" },
{ ST_TASK_DUP_ALLOC, "Task dup alloc" },
{ ST_CHECK_DEPS, "Check deps" },
{ ST_TASKGROUP, "Taskgroup" },
/* Work-distribution */
{ ST_WD_DISTRIBUTE, "Work-distribution: Distribute" },
{ ST_WD_FOR_DYNAMIC_CHUNK, "Work-distribution: Dynamic for chunk" },
{ ST_WD_FOR_DYNAMIC_INIT, "Work-distribution: Dynamic for initialization" },
{ ST_WD_FOR_STATIC, "Work-distribution: Static for chunk" },
{ ST_WD_SECTION, "Work-distribution: Section" },
{ ST_WD_SINGLE, "Work-distribution: Single" },
/* Task */
{ ST_TASK_ALLOC, "Task: Allocation" },
{ ST_TASK_CHECK_DEPS, "Task: Check deps" },
{ ST_TASK_DUP_ALLOC, "Task: Duplicating" },
{ ST_TASK_RELEASE_DEPS, "Task: Releasing deps" },
{ ST_TASK_RUN, "Task: Running task" },
{ ST_TASK_RUN_IF0, "Task: Running task if0" },
{ ST_TASK_SCHEDULE, "Task: Scheduling" },
{ ST_TASK_TASKGROUP, "Task: Taskgroup" },
{ ST_TASK_TASKWAIT, "Task: Taskwait" },
{ ST_TASK_TASKWAIT_DEPS, "Task: Taskwait deps" },
{ ST_TASK_TASKYIELD, "Task: Taskyield" },
/* Critical */
{ ST_CRITICAL_ACQ, "Critical: Acquiring" },
{ ST_CRITICAL_REL, "Critical: Releasing" },
{ ST_CRITICAL_SECTION, "Critical: Section" },
/* Barrier */
{ ST_BARRIER_FORK, "Barrier: Fork" },
{ ST_BARRIER_JOIN, "Barrier: Join" },
{ ST_BARRIER_PLAIN, "Barrier: Plain" },
{ ST_BARRIER_TASK, "Barrier: Task" },
{ ST_BARRIER_SPIN_WAIT, "Barrier: Spin wait" },
/* Runtime */
{ ST_RT_ATTACHED, "Runtime: Attached" },
{ ST_RT_FORK_CALL, "Runtime: Fork call" },
{ ST_RT_INIT, "Runtime: Initialization" },
{ ST_RT_MICROTASK_INTERNAL, "Runtime: Internal microtask" },
{ ST_RT_MICROTASK_USER, "Runtime: User microtask" },
{ ST_RT_WORKER_LOOP, "Runtime: Worker main loop" },
{ -1, NULL },
};
@ -114,7 +149,7 @@ static const struct pcf_value_label *pcf_labels[CH_MAX] = {
};
static const long prv_flags[CH_MAX] = {
[CH_SUBSYSTEM] = PRV_SKIPDUP,
[CH_SUBSYSTEM] = PRV_EMITDUP,
};
static const struct model_pvt_spec pvt_spec = {
@ -127,7 +162,7 @@ static const struct model_pvt_spec pvt_spec = {
/* ----------------- tracking ------------------ */
static const int th_track[CH_MAX] = {
[CH_SUBSYSTEM] = TRACK_TH_RUN,
[CH_SUBSYSTEM] = TRACK_TH_ACT,
};
static const int cpu_track[CH_MAX] = {
@ -141,6 +176,7 @@ static const struct model_chan_spec th_chan = {
.prefix = model_name,
.ch_names = chan_name,
.ch_stack = chan_stack,
.ch_dup = chan_dup,
.pvt = &pvt_spec,
.track = th_track,
};
@ -150,6 +186,7 @@ static const struct model_chan_spec cpu_chan = {
.prefix = model_name,
.ch_names = chan_name,
.ch_stack = chan_stack,
.ch_dup = chan_dup,
.pvt = &pvt_spec,
.track = cpu_track,
};

View File

@ -4,3 +4,4 @@
add_subdirectory(nanos6)
add_subdirectory(nodes)
add_subdirectory(nosv)
add_subdirectory(openmp)

View File

@ -0,0 +1,50 @@
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
# SPDX-License-Identifier: GPL-3.0-or-later
check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND)
check_linker_flag(C "-fopenmp=libompv" OPENMPV_LINKER_FOUND)
cmake_path(GET CMAKE_C_COMPILER PARENT_PATH CMAKE_C_COMPILER_PATH)
if(NOT OPENMPV_COMPILER_FOUND OR NOT OPENMPV_LINKER_FOUND)
if(ENABLE_ALL_TESTS)
message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable OpenMP-V RT tests")
else()
message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling OpenMP-V RT tests")
endif()
return()
endif()
function(openmp_rt_test)
ovni_test(${ARGN})
target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv"
"-no-pedantic")
target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv")
target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m")
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
ENVIRONMENT "OMP_OVNI=1")
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni")
endfunction()
openmp_rt_test(barrier-explicit.c)
openmp_rt_test(critical.c)
openmp_rt_test(if0-nested-task.c)
openmp_rt_test(if0.c)
openmp_rt_test(multi-parallels.c)
openmp_rt_test(parallel-for.c)
openmp_rt_test(parallel-loop.c)
openmp_rt_test(parallel-nested.c)
openmp_rt_test(parallel-task.c)
openmp_rt_test(sections.c)
openmp_rt_test(simple-task.c)
openmp_rt_test(task.c)
openmp_rt_test(taskloop.c)
openmp_rt_test(taskwait.c)
openmp_rt_test(team-distribute.c)
openmp_rt_test(worksharing-and-tasks.c)
openmp_rt_test(worksharing-mix.c)
openmp_rt_test(worksharing-task.c)
openmp_rt_test(worksharing.c)
openmp_rt_test(worksharing01.c)
openmp_rt_test(worksharing02.c)
openmp_rt_test(worksharing03.c)

View File

@ -0,0 +1,47 @@
#include <stdio.h>
#include <math.h>
#include "compat.h"
#define N 100
static void
dummy_work(double *x, int i)
{
sleep_us(i);
x[i] += sqrt((double) i);
}
int main(void)
{
double x[N] = { 0 };
#pragma omp parallel
{
#pragma omp single
{
for (int i = 0; i < N; i++) {
#pragma omp task shared(x)
dummy_work(x, i);
}
}
sleep_us(200);
#pragma omp barrier
sleep_us(1000);
#pragma omp barrier
#pragma omp single
{
for (int i = 0; i < N; i++) {
#pragma omp task shared(x)
dummy_work(x, i);
}
}
}
double sum = 0.0;
for (int i = 0; i < N; i++)
sum += x[i];
printf("sum = %e\n", sum);
return 0;
}

16
test/rt/openmp/critical.c Normal file
View File

@ -0,0 +1,16 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
sleep_us(1000);
#pragma omp critical
sleep_us(200);
sleep_us(1000);
}
return 0;
}

View File

@ -0,0 +1,20 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp task if(0)
{
#pragma omp task
{
sleep_us(1000);
}
#pragma omp taskwait
}
}
return 0;
}

17
test/rt/openmp/if0.c Normal file
View File

@ -0,0 +1,17 @@
#include <omp.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp task if(0)
{
sleep_us(1000);
}
}
return 0;
}

View File

@ -0,0 +1,13 @@
#include "compat.h"
int main(void)
{
for (int i = 0; i < 10; i++) {
#pragma omp parallel
{
sleep_us(1000);
}
}
return 0;
}

View File

@ -0,0 +1,29 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
}
return 0;
}

View File

@ -0,0 +1,14 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp loop
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
}
return 0;
}

View File

@ -0,0 +1,22 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
#pragma omp parallel
{
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
}
}
return 0;
}

View File

@ -0,0 +1,26 @@
#include "compat.h"
static void foo(void)
{
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++)
sleep_us(i);
#pragma omp single
for (int i = 0; i < 100; i++)
{
#pragma omp task
sleep_us(10);
}
}
int main(void)
{
#pragma omp parallel
{
foo();
foo();
}
return 0;
}

51
test/rt/openmp/sections.c Normal file
View File

@ -0,0 +1,51 @@
#include <stdio.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel sections
{
#pragma omp section
{ sleep_us(1001); printf("1001\n"); }
#pragma omp section
{ sleep_us(1002); printf("1002\n"); }
#pragma omp section
{ sleep_us(1003); printf("1003\n"); }
#pragma omp section
{ sleep_us(1004); printf("1004\n"); }
#pragma omp section
sleep_us(1005);
#pragma omp section
sleep_us(1006);
#pragma omp section
sleep_us(1007);
#pragma omp section
sleep_us(1008);
#pragma omp section
sleep_us(1009);
#pragma omp section
sleep_us(1010);
#pragma omp section
sleep_us(1011);
#pragma omp section
sleep_us(1012);
#pragma omp section
sleep_us(1013);
#pragma omp section
sleep_us(1014);
#pragma omp section
sleep_us(1015);
#pragma omp section
sleep_us(1016);
#pragma omp section
sleep_us(1017);
#pragma omp section
sleep_us(1018);
#pragma omp section
sleep_us(1019);
#pragma omp section
sleep_us(1020);
}
return 0;
}

View File

@ -0,0 +1,23 @@
#include "compat.h"
int main(void)
{
int a;
int *p = &a;
#pragma omp parallel
#pragma omp single
{
#pragma omp task depend(out : p[0])
{
sleep_us(1000);
}
for (int i = 0; i < 10000; i++)
{
#pragma omp task depend(in : p[0])
sleep_us(1);
}
}
return 0;
}

19
test/rt/openmp/task.c Normal file
View File

@ -0,0 +1,19 @@
#include <stdio.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
for (int i = 0; i < 10; i++) {
#pragma omp task
{
printf("%d\n", i);
sleep_us(100);
}
}
#pragma omp barrier
}
return 0;
}

17
test/rt/openmp/taskloop.c Normal file
View File

@ -0,0 +1,17 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp taskloop
for (int i = 0; i < 10000; i++)
{
#pragma omp task
sleep_us(1);
}
}
return 0;
}

42
test/rt/openmp/taskwait.c Normal file
View File

@ -0,0 +1,42 @@
#include "compat.h"
#include <stdio.h>
int main(void)
{
#pragma omp parallel
#pragma omp single
{
#pragma omp task label("A")
{
sleep_us(5000);
printf("A\n");
}
#pragma omp task label("B")
{
#pragma omp task label("B1")
{
sleep_us(2000);
printf("B1\n");
}
/* Shouldn't wait for task A */
#pragma omp taskwait
#pragma omp task
{
sleep_us(1000);
printf("B2\n");
}
}
#pragma omp task label("C")
{
printf("C\n");
}
}
/* Expected output C B1 B2 A */
return 0;
}

View File

@ -0,0 +1,14 @@
#include <omp.h>
#include "compat.h"
int main(void)
{
#pragma omp teams num_teams(2)
{
#pragma omp distribute parallel for
for (volatile int i = 0; i < 1000; i++)
sleep_us(100 + i);
}
return 0;
}

View File

@ -0,0 +1,34 @@
#include <stdio.h>
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
//#pragma omp single nowait
for (int i = 0; i < 100; i++) {
#pragma omp task
sleep_us(10);
}
/* Wait a bit for task allocation */
sleep_us(1000);
/* Occupy 4 CPUs with sections */
#pragma omp sections nowait
{
#pragma omp section
{ sleep_us(1001); printf("1001\n"); }
#pragma omp section
{ sleep_us(1002); printf("1002\n"); }
#pragma omp section
{ sleep_us(1003); printf("1003\n"); }
#pragma omp section
{ sleep_us(1004); printf("1004\n"); }
}
#pragma omp taskwait
}
return 0;
}

View File

@ -0,0 +1,66 @@
#include <omp.h>
#include <stdio.h>
#include "compat.h"
/* Test several work-distribution and task constructs, so we can generate a
* trace that includes most of the states. */
int main(void)
{
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp sections
{
#pragma omp section
{ sleep_us(101); printf("101\n"); }
#pragma omp section
{ sleep_us(102); printf("102\n"); }
#pragma omp section
{ sleep_us(103); printf("103\n"); }
#pragma omp section
{ sleep_us(104); printf("104\n"); }
}
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp single
for (int i = 0; i < 100; i++)
{
#pragma omp task
sleep_us(10);
}
}
#pragma omp parallel
{
#pragma omp critical
sleep_us(20);
#pragma omp barrier
#pragma omp for
for (int i = 0; i < 100; i++) {
sleep_us(1);
}
#pragma omp for schedule(dynamic, 1)
for (int i = 0; i < 100; i++) {
sleep_us(i);
}
}
// FIXME: Crashes OpenMP-V runtime
//#pragma omp distribute parallel for
//for (int i = 0; i < 1000; i++) {
// sleep_us(1);
//}
return 0;
}

View File

@ -0,0 +1,25 @@
#include <omp.h>
#include "compat.h"
static void foo(void)
{
#pragma omp for
for (int i = 0; i < 100; ++i)
{
#pragma omp task
{
sleep_us(1);
}
}
}
int main(void)
{
#pragma omp parallel
{
foo();
foo();
}
return 0;
}

View File

@ -0,0 +1,16 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
{
#pragma omp for schedule(dynamic) ordered label("omp for dynamic")
for (int i = 0; i < 100; i++)
sleep_us(100);
#pragma omp single label("single")
sleep_us(1000);
}
return 0;
}

View File

@ -0,0 +1,11 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp for schedule(static, 30)
for (int i = 0; i < 100; i++)
sleep_us(10);
return 0;
}

View File

@ -0,0 +1,12 @@
#include "compat.h"
int main(void)
{
#pragma omp target
#pragma omp teams num_teams(1)
#pragma omp distribute dist_schedule(static, 30)
for (int i = 0; i < 100; i++)
sleep_us(10);
return 0;
}

View File

@ -0,0 +1,11 @@
#include "compat.h"
int main(void)
{
#pragma omp parallel
#pragma omp for schedule(dynamic)
for (int i = 0; i < 100; i++)
sleep_us(10);
return 0;
}