diff --git a/CHANGELOG.md b/CHANGELOG.md
index 97e3d24..b225e58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+### Added
+
+- Add OpenMP model (`P`) at version 1.1.0 (currently it only supports subsystems
+ and only works with the OpenMP-V runtime, on top of nOS-V).
+
### Changed
- Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}.
diff --git a/cfg/cpu/openmp/subsystem.cfg b/cfg/cpu/openmp/subsystem.cfg
index 21803d4..df3ae5c 100644
--- a/cfg/cpu/openmp/subsystem.cfg
+++ b/cfg/cpu/openmp/subsystem.cfg
@@ -16,6 +16,8 @@ window_height 150
window_comm_lines_enabled true
window_flags_enabled false
window_noncolor_mode true
+window_custom_color_enabled true
+window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true
diff --git a/cfg/thread/openmp/subsystem.cfg b/cfg/thread/openmp/subsystem.cfg
index 849ac61..917d077 100644
--- a/cfg/thread/openmp/subsystem.cfg
+++ b/cfg/thread/openmp/subsystem.cfg
@@ -4,9 +4,9 @@ ConfigFile.NumWindows: 1
################################################################################
-< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread >
+< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the ACTIVE thread >
################################################################################
-window_name Thread: OpenMP subsystem of the RUNNING thread
+window_name Thread: OpenMP subsystem of the ACTIVE thread
window_type single
window_id 1
window_position_x 0
@@ -16,6 +16,8 @@ window_height 150
window_comm_lines_enabled true
window_flags_enabled false
window_noncolor_mode true
+window_custom_color_enabled true
+window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true
@@ -38,5 +40,5 @@ window_labels_to_draw 1
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
window_filter_module evt_type 1 50
-window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread"
+window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the ACTIVE thread"
diff --git a/doc/user/emulation/events.md b/doc/user/emulation/events.md
index c2521d0..02f0456 100644
--- a/doc/user/emulation/events.md
+++ b/doc/user/emulation/events.md
@@ -433,86 +433,130 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`:
List of events for the model *openmp* with identifier **`P`** at version `1.1.0`:
-PA[
-- enters the attached state
-PA]
-- leaves the attached state
-PBj
-- enters a join barrier
-PBJ
-- leaves a join barrier
PBb
-- enters a barrier
+- begins plain barrier
PBB
-- leaves a barrier
+- ceases plain barrier
+PBj
+- begins join barrier
+PBJ
+- ceases join barrier
+PBf
+- begins fork barrier
+PBF
+- ceases fork barrier
PBt
-- enters a tasking barrier
+- begins tasking barrier
PBT
-- leaves a tasking barrier
+- ceases tasking barrier
PBs
-- enters a spin wait
+- begins spin wait
PBS
-- leaves a spin wait
-PWs
-- begins static for
-PWS
-- ceases static for
+- ceases spin wait
+PIa
+- begins critical acquiring
+PIA
+- ceases critical acquiring
+PIr
+- begins critical releasing
+PIR
+- ceases critical releasing
+PI[
+- begins critical section
+PI]
+- ceases critical section
PWd
-- begins dynamic for init
+- begins distribute
PWD
+- ceases distribute
+PWy
+- begins dynamic for init
+PWY
- ceases dynamic for init
PWc
- begins dynamic for chunk
PWC
- ceases dynamic for chunk
+PWs
+- begins static for
+PWS
+- ceases static for
+PWe
+- begins section
+PWE
+- ceases section
PWi
- begins single
PWI
- ceases single
-PTr
-- begins releasing task dependencies
-PTR
-- ceases releasing task dependencies
-PTw
-- begins waiting for taskwait dependencies
-PTW
-- ceases waiting for taskwait dependencies
-PT[
-- begins invoking a task
-PT]
-- ceases invoking a task
-PTi
-- begins invoking an if0 task
-PTI
-- ceases invoking an if0 task
PTa
- begins task allocation
PTA
- ceases task allocation
-PTs
-- begins scheduling a task
-PTS
-- ceases scheduling a task
-PTt
-- enters a taskwait
-PTT
-- leaves a taskwait
-PTy
-- enters a taskyield
-PTY
-- leaves a taskyield
-PTd
-- begins duplicating a task
-PTD
-- ceases duplicating a task
PTc
- begins checking task dependencies
PTC
- ceases checking task dependencies
+PTd
+- begins duplicating a task
+PTD
+- ceases duplicating a task
+PTr
+- begins releasing task dependencies
+PTR
+- ceases releasing task dependencies
+PT[
+- begins running a task
+PT]
+- ceases running a task
+PTi
+- begins running an if0 task
+PTI
+- ceases running an if0 task
+PTs
+- begins scheduling a task
+PTS
+- ceases scheduling a task
PTg
-- enters a taskgroup
+- begins a taskgroup
PTG
-- leaves a taskgroup
+- ceases a taskgroup
+PTt
+- begins a taskwait
+PTT
+- ceases a taskwait
+PTw
+- begins waiting for taskwait dependencies
+PTW
+- ceases waiting for taskwait dependencies
+PTy
+- begins a taskyield
+PTY
+- ceases a taskyield
+PA[
+- enters the attached state
+PA]
+- leaves the attached state
+PMi
+- begins microtask internal
+PMI
+- ceases microtask internal
+PMu
+- begins microtask user code
+PMU
+- ceases microtask user code
+PH[
+- begins worker loop
+PH]
+- ceases worker loop
+PCf
+- begins fork call
+PCF
+- ceases fork call
+PCi
+- begins initialization
+PCI
+- ceases initialization
## Model tampi
diff --git a/doc/user/emulation/fig/openmp-subsystem.png b/doc/user/emulation/fig/openmp-subsystem.png
new file mode 100644
index 0000000..abfc71b
Binary files /dev/null and b/doc/user/emulation/fig/openmp-subsystem.png differ
diff --git a/doc/user/emulation/openmp.md b/doc/user/emulation/openmp.md
index 4806c3b..d176eb7 100644
--- a/doc/user/emulation/openmp.md
+++ b/doc/user/emulation/openmp.md
@@ -1,164 +1,243 @@
-# OpenMP Model
+# OpenMP model
-The LLVM OpenMP Runtime is an integral component of the LLVM compiler
-infrastructure that provides support for the OpenMP (Open Multi-Processing)
-programming model.
+The [OpenMP programming model](https://www.openmp.org) is a widely used API and
+set of directives for parallel programming, allowing developers to write
+multi-threaded and multi-process applications more easily. In this document we
+refer to the
+[version 5.2 of the OpenMP specification](https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf).
-OpenMP is a widely used API and set of directives for parallel programming,
-allowing developers to write multi-threaded and multi-process applications more
-easily.
+The [LLVM OpenMP Runtime](https://openmp.llvm.org/design/Runtimes.html) provides
+an implementation of the OpenMP specification as a component of the LLVM
+compiler infrastructure. We have modified the LLVM OpenMP runtime to run on top
+of the [nOS-V](https://gitlab.bsc.es/nos-v/nos-v) runtime as part of the
+[OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **OpenMP-V**.
-This documentation is about an OpenMP runtime built on top of [nOS-V][nosv],
-leveraging its thread management capabilities while retaining the fundamental
-characteristics of the original runtime.
+We have added instrumentation events to OpenMP-V designed to be enabled along
+the [nOS-V instrumentation](nosv.md). This document describes all the
+instrumentation features included in our modified OpenMP-V runtime to identify
+what is happening. This data is useful for both users and developers of the
+OpenMP runtime to analyze issues and undesired behaviors.
-While the modifications introduced to the runtime may appear to be minor, it's
-important to note that this enhanced version is not API compatible with the
-original runtime. As a result, it is mandatory to use the clang built in the same
-[LLVM Project][llvm].
+!!! Note
-This document describes all the instrumentation features included in the runtime
-by both nOS-V and OpenMP to monitor task execution and the execution flow within
-the runtime library to identify what is happening. This data is useful for both
-users and developers of the OpenMP runtime to analyze issues and undesired
-behaviors.
+ Instrumenting the original OpenMP runtime from the LLVM project is planned
+ but is not yet posible. For now you must use the modified OpenMP-V runtime
+ with nOS-V.
-[llvm]: https://pm.bsc.es/llvm-ompss
-[nosv]: https://gitlab.bsc.es/nos-v/nos-v
+## Enable the instrumentation
-## How to Generate Execution Traces
+To generate runtime traces, you will have to:
-In order to build the OpenMP runtime nOS-V must be provided by using
-`PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a
-runtime without instrumentation. However, the user may be able to generate
-execution traces by enabling nOS-V instrumentation through
-`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a
-nOS-V installation built with ovni.
+1. **Build nOS-V with ovni support:** Refer to the
+ [nOS-V
+ documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md).
+ Typically you should use the `--with-ovni` option at configure time to specify
+ where ovni is installed.
+2. **Build OpenMP-V with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH`
+ environment variable to specify the nOS-V and ovni installation
+ when configuring CMake.
+3. **Enable the instrumentation in nOS-V at runtime:** Refer to the
+ [nOS-V documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md)
+ to find out how to enable the tracing at runtime. Typically you can just set
+ `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
+4. **Enable the instrumentation of OpenMP-V at runtime:** Set the environment
+ variable `OMP_OVNI=1`.
-Building OpenMP with instrumentation requires to pass ovni pkg-config path to
-`PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is
-because OpenMP is dependent of nOS-V to generate complete execution traces.
+Currently there is only support for the subsystem view, which is documented
+below. The view is complemented with the information of [nOS-V views](nosv.md),
+as OpenMP-V uses nOS-V tasks to run the workers.
-By default, OpenMP will not instrument anything. To enable instrumentation the
-user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
+## Subsystem view
-The following sections will describe the OpenMP execution trace views and what
-information is shown there.
-
-## nOS-V Task Type
-
-As said in the previous sections. This OpenMP runtime is built on top of nOS-V.
-So the user can explore what does the execution do there. Here we only describe
-the task type view. For other views please take a look at the nOS-V chapter.
-
-In OpenMP, every thread that is launched (main thread included) is shown in a task
-type with label "openmp". In a task application, every task call will be seen with
-a task type with label "file:line:col" format referring to the pragma location. This
-can be changed by using the clause label(string-literal).
-
-OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for
-more information. Nevertheless, the OpenMP task view shows it.
-
-## OpenMP Subsystem
+![Subsystem view example](fig/openmp-subsystem.png)
This view illustrates the activities of each thread with different states:
-- **Attached**: The thread is attached.
+- **Work-distribution subsystem**: Related to work-distribution constructs,
+ [in Chapter 11][workdis].
-- **Join barrier**: The thread is in the implicit barrier of the parallel region.
+ - **Distribute**: Running a *Distribute* region.
-- **Tasking barrier**: The thread is in the additional tasking barrier trying to
- execute tasks. This event happens if executed with KMP_TASKING=1.
+ - **Dynamic for chunk**: Running a chunk of a dynamic *for*, which often
+ involve running more than one iteration of the loop. See the
+ [limitations](#dynamic_for) below.
-- **Spin wait**: The thread spin waits for a condition. Usually this event happens
- in a barrier while waiting for the other threads to reach the barrier. The thread
- also tries to execute tasks.
+ - **Dynamic for initialization**: Preparing a dynamic *for*.
-- **For static**: Executing a for static. The length of the event represents all the
- chunks of iterations executed by the thread. See "Limitations" section.
+ - **Static for chunk**: Executing the assigned iterations of an static
+ *for*.
-- **For dynamic init**: Running the initialization of an OpenMP for dynamic.
+ - **Single**: Running a *Single* region. All threads of the parallel region
+ participate.
-- **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To
- clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and
- from 8 to 12, two different events will be shown. See "Limitations" section.
+ - **Section**: Running a *Section* region. All threads of the parallel region
+ participate.
-- **Single**: Running a Single region. All threads of the parallel region will emit
- the event.
+- **Task subsystem**: Related to tasking constructs, [in Chapter 12][tasking].
-- **Release deps**: When finishing a task, trying to release dependencies. This
- event happens although the task has no dependencies.
+ - **Allocation**: Allocating the task descriptor.
-- **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled.
- This appears typically in a task if0 with dependencies or a taskwait with deps.
+ - **Check deps**: Checking if the task has pending dependencies to be
+ fulfilled. When all dependencies are fulfilled the task will be scheduled.
-- **Invoke task**: Executing a task.
+ - **Duplicating**: Duplicating the task descriptor in a taskloop.
-- **Invoke task if0**: Executing a task if0.
+ - **Releasing deps**: Releasing dependencies at the end of a task. This
+ state is always present even if the task has no dependencies.
-- **Task alloc**: Allocating the task descriptor.
+ - **Running task**: Executing a task.
-- **Task schedule**: Adding the task to the scheduler.
+ - **Running task if0**: Executing a task if0.
-- **Taskwait**: Running a taskwait.
+ - **Scheduling**: Adding the task to the scheduler for execution.
-- **Taskyield**: Running a taskyield.
+ - **Taskgroup**: Waiting in a *taskgroup* construct.
-- **Task dup alloc**: Duplicating the task descriptor in a taskloop.
+ - **Taskwait**: Waiting in a *taskwait* construct.
-- **Check deps**: Checking if the task has pending dependencies to be fulfilled. This
- means that if all dependencies are fulfilled the task will be scheduled.
+ - **Taskwait deps**: Trying to execute tasks until dependencies have been
+ fulfilled. This appears typically in a task if0 with dependencies or a
+ taskwait with deps.
+
+ - **Taskyield**: Performing a *taskyield* construct.
-- **Taskgroup**: Running a taskgroup.
+- **Critical subsystem**: Related to the *critical* Constuct, in [Section 15.2][critical].
+
+ - **Acquiring**: Waiting to acquire a *Critical* section.
+
+ - **Section**: Running the *Critical* section.
+
+ - **Releasing**: Waiting to release a *Critical* section.
+
+- **Barrier subsystem**: Related to barriers, in [Section 15.3][barrier].
+ **All barriers can try to execute tasks**.
+
+ - **Barrier: Fork**: Workers wait for a release signal from the master thread to
+ continue. The master can continue as soon as it signals the workers. It is
+ done at the beginning of a fork-join region.
+
+ - **Barrier: Join**: The master thread waits until all workers finish their work.
+ Workers can continue as soon as they signal the master. It is done at the
+ end of a fork-join region.
+
+ - **Barrier: Plain**: Performing a plain barrier, which waits for a release
+ signal from the master thread to continue. It is done at the beginning of
+ a fork-join region, in the `__kmp_join_barrier()` function.
+
+ - **Barrier: Task**: Blocked in an additional tasking barrier *until all previous
+ tasks have been executed*. Only happens when executed with `KMP_TASKING=1`.
+
+- **Runtime subsystem**: Internal operations of the runtime.
+
+ - **Attached**: Present after the call to `nosv_attach()` and before
+ `nosv_detach()`. This state is a hack.
+
+ - **Fork call**: Preparing a parallel section using the fork-join model.
+ Only called from the master thread.
+
+ - **Init**: Initializing the OpenMP-V runtime.
+
+ - **Internal microtask**: Running a internal OpenMP-V function as a microtask.
+
+ - **User microtask**: Running user code as a microtask in a worker thread.
+
+ - **Worker main Loop**: Running the main loop, where the workers run the
+ fork barrier, run a microtask and perform a join barrier until there is no
+ more work.
+
+!!! Note
+
+ The generated HTML version of the OpenMP 5.2 specification has some parts
+ missing, so we link directly to the PDF file which may not work in some
+ browsers.
+
+[workdis]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.11
+[tasking]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.12
+[critical]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.2
+[barrier]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.3
## Limitations
-By the way how OpenMP is implemented. There are some instrumentation points that
-violate ovni subsystem rules. This mostly happens because some directives are lowered
-partially in the transformed user code, so it is not easy to wrap them into a
-Single-entry single-exit (SESE) region, like we would do with a regular task invocation,
-for example.
+As the compiler generates the code that perform the calls to the OpenMP-V
+runtime, there are some parts of the execution that are complicated to
+instrument by just placing a pair of events to delimite a function.
-All problematic directives are described here so the user is able to understand what
-is being show in the traces
+For those cases we use an approximation which is documented in the following
+subsections.
-- **Task if0**: The lowered user code of a task if0 is:
- ... = __kmpc_omp_task_alloc(...);
- __kmpc_omp_taskwait_deps_51(...); // If task has dependencies
- __kmpc_omp_task_begin_if0(...);
- // Call to the user code
- omp_task_entry_(...);
- __kmpc_omp_task_complete_if0(...);
+### Dynamic for
- Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As
- this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0`
- and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent.
+The generated code of a *dynamic for* has the following structure:
-- **For static**: The lowered user code of a for static is:
- // Parallel code
- __kmpc_for_static_init_4(...);
- for ( i = ...; i <= ...; ++i )
- ;
- __kmpc_for_static_fini(...);
+```c
+__kmpc_dispatch_init_4(...);
+while (__kmpc_dispatch_next_4(...)) {
+ for (i = ...; i <= ...; i++) {
+ // User code ...
+ }
+}
+```
- Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
- this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4`
- and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent.
+The function `__kmpc_dispatch_next_4()` returns `true` if there are more
+chunks (group of iterations) to be executed by the thread, otherwise it returns
+`false`.
-- **For dynamic**: The lowered user code of a for dynamic is:
+Ideally we want to instrument each chunk with a pair of begin and end events.
- __kmpc_dispatch_init_4(...);
- while ( __kmpc_dispatch_next_4(...))
- {
- for ( i = ...; i <= ...; ++i )
- ;
- }
+The problem with the instrumentation is that there is no easy way of determining
+if the call to `__kmpc_dispatch_next_4()` is processing the first chunk, just
+after `__kmpc_dispatch_init_4()`, or is coming from other chunks due to the
+while loop.
- Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
- this code is generated by the compiler the subsystem view shows:
- 1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init**
- 2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`.
- with the event **For dynamic chunk**.
- 3. How long it takes to run a loop iteration chunk between the last and the previous
- `__kmpc_dispatch_next_4` call with the event **For dynamic chunk**.
+Therefore, from the `__kmpc_dispatch_next_4()` alone, we cannot determine if we
+need to only emit a single "begin a new chunk" event or we need to emit the pair
+of events "finish the last chunk" and "begin a new one".
+So, as a workaround, we emit an event from the end of `__kmpc_dispatch_init_4()`
+starting a new chunk (which is fake), and then from `__kmpc_dispatch_next_4()` we
+always emit the "finish the last chunk" and "begin a new one" events (unless
+there are no more chunks, in which case we don't emit the "begin a new one"
+event).
+
+This will cause an spurious *Work-distribution: Dynamic for chunk* state at the
+beginning of each dynamic for, which should be very short and is not really a
+chunk.
+
+### Static for
+
+The generated code of an *static for* has the following structure:
+
+```c
+__kmpc_for_static_init_4(...);
+for (i = ...; i <= ...; i++) {
+ // User code ...
+}
+__kmpc_for_static_fini(...);
+```
+
+As this code is generated by the compiler we cannot easily add the begin/end
+pair of events to mark the *Work-distribution: Static for chunk* state.
+
+We assume that by placing the "begin processing a chunk" event at the end of
+`__kmpc_for_static_init_4()` and the "end processing the chunk" event at
+the beginning of `__kmpc_for_static_fini()` is equivalent to adding the
+events surrounding the for loop.
+
+### Task if0
+
+The generated code of an *if0 task* has the following structure:
+
+```c
+... = __kmpc_omp_task_alloc(...);
+__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
+__kmpc_omp_task_begin_if0(...);
+// Call to the user code
+omp_task_entry_(...);
+__kmpc_omp_task_complete_if0(...);
+```
+
+Instead of injecting the begin and end events in the user code, we
+approximate it by placing the "begin if0 task" event at the end of the
+`__kmpc_omp_task_begin_if0` function and the "end if0 task" event at the
+beginning of `__kmpc_omp_task_complete_if0`. This state will be shown as
+*Task: Running task if0*.
diff --git a/flake.lock b/flake.lock
index 8f50bc5..92bc150 100644
--- a/flake.lock
+++ b/flake.lock
@@ -7,11 +7,11 @@
]
},
"locked": {
- "lastModified": 1701968480,
- "narHash": "sha256-YoKN8FZllNQfpEpMqGOBv77kp9J0mlVRlhixWbcDqWg=",
+ "lastModified": 1705310446,
+ "narHash": "sha256-PaPnkGotb2omIV6OsS72MGkqNN6Q/iHLlXQZ6S3vWOY=",
"ref": "refs/heads/master",
- "rev": "c4d5135fde108401417fdcdf5e1c8d11aeca4f32",
- "revCount": 931,
+ "rev": "3b21a32d835ff06741d5d59cd023ff2ae1ecb19f",
+ "revCount": 932,
"type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs"
},
diff --git a/flake.nix b/flake.nix
index 8a56f28..1df4c69 100644
--- a/flake.nix
+++ b/flake.nix
@@ -7,11 +7,15 @@
outputs = { self, nixpkgs, bscpkgs }:
let
+ # Set to true to replace all libovni in all runtimes with the current
+ # source. Causes large rebuilds on changes of ovni.
+ useLocalOvni = false;
+
ovniOverlay = final: prev: {
nosv = prev.nosv.override {
useGit = true;
gitBranch = "master";
- gitCommit = "6a63fd4378ba458243dda3159500c1450edf0e82";
+ gitCommit = "9abad7d31476e97842d3b42f1fc1fb03d4cf817b";
};
nanos6 = prev.nanos6.override {
useGit = true;
@@ -23,13 +27,27 @@
gitBranch = "master";
gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f";
};
+ clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override {
+ useGit = true;
+ gitBranch = "master";
+ gitCommit = "9dc4a4deea5e09850435782026eaae2f5290d886";
+ };
+
+ # Use a fixed commit for libovni
+ ovniFixed = prev.ovni.override {
+ useGit = true;
+ gitBranch = "master";
+ gitCommit = "68fc8b0eba299c3a7fa3833ace2c94933a26749e";
+ };
# Build with the current source
- ovni = prev.ovni.overrideAttrs (old: rec {
+ ovniLocal = prev.ovni.overrideAttrs (old: rec {
pname = "ovni-local";
version = if self ? shortRev then self.shortRev else "dirty";
src = self;
cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ];
});
+ # Select correct ovni for libovni
+ ovni = if (useLocalOvni) then final.ovniLocal else final.ovniFixed;
};
pkgs = import nixpkgs {
system = "x86_64-linux";
@@ -51,12 +69,12 @@
];
lib = pkgs.lib;
in {
- packages.x86_64-linux.ovniPackages = rec {
+ packages.x86_64-linux.ovniPackages = {
+ # Allow inspection of packages from the command line
+ inherit pkgs;
+ } // rec {
# Build with the current source
- local = pkgs.ovni.overrideAttrs (old: {
- pname = "ovni-local";
- src = self;
- });
+ local = pkgs.ovniLocal;
# Build in Debug mode
debug = local.overrideAttrs (old: {
@@ -97,12 +115,13 @@
# We need to be able to exit the chroot to run Nanos6 tests, as they
# require access to /sys for hwloc
__noChroot = true;
- buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes ]);
+ buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes openmpv ]);
cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ];
preConfigure = old.preConfigure or "" + ''
export NOSV_HOME="${pkgs.nosv}"
export NODES_HOME="${pkgs.nodes}"
export NANOS6_HOME="${pkgs.nanos6}"
+ export OPENMP_RUNTIME="libompv"
'';
});
diff --git a/mkdocs.yml b/mkdocs.yml
index 4319916..ed93df2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -37,6 +37,7 @@ nav:
- user/emulation/nanos6.md
- user/emulation/tampi.md
- user/emulation/mpi.md
+ - user/emulation/openmp.md
- user/emulation/events.md
- CHANGELOG.md
- 'Developer guide':
diff --git a/src/emu/openmp/event.c b/src/emu/openmp/event.c
index 1250041..445db7c 100644
--- a/src/emu/openmp/event.c
+++ b/src/emu/openmp/event.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
+/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include "openmp_priv.h"
@@ -14,53 +14,83 @@
enum { PUSH = 1, POP = 2, IGN = 3 };
static const int fn_table[256][256][3] = {
- ['A'] = {
- ['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED },
- [']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED },
- },
['B'] = {
- ['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER },
- ['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER },
- ['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER },
- ['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER },
- ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER },
- ['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER },
- ['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT },
- ['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT },
+ ['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_PLAIN },
+ ['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER_PLAIN },
+ ['j'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_JOIN },
+ ['J'] = { CH_SUBSYSTEM, POP, ST_BARRIER_JOIN },
+ ['f'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_FORK },
+ ['F'] = { CH_SUBSYSTEM, POP, ST_BARRIER_FORK },
+ ['t'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_TASK },
+ ['T'] = { CH_SUBSYSTEM, POP, ST_BARRIER_TASK },
+ ['s'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
+ ['S'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
+ },
+ ['I'] = {
+ ['a'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_ACQ },
+ ['A'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_ACQ },
+ ['r'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_REL },
+ ['R'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_REL },
+ ['['] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_SECTION },
+ [']'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_SECTION },
},
['W'] = {
- ['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC },
- ['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC },
- ['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT },
- ['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT },
- ['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK },
- ['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK },
- ['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE },
- ['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE },
+ ['d'] = { CH_SUBSYSTEM, PUSH, ST_WD_DISTRIBUTE },
+ ['D'] = { CH_SUBSYSTEM, POP, ST_WD_DISTRIBUTE },
+ ['c'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_CHUNK },
+ ['C'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_CHUNK },
+ ['y'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_INIT },
+ ['Y'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_INIT },
+ ['s'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_STATIC },
+ ['S'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_STATIC },
+ ['e'] = { CH_SUBSYSTEM, PUSH, ST_WD_SECTION },
+ ['E'] = { CH_SUBSYSTEM, POP, ST_WD_SECTION },
+ ['i'] = { CH_SUBSYSTEM, PUSH, ST_WD_SINGLE },
+ ['I'] = { CH_SUBSYSTEM, POP, ST_WD_SINGLE },
},
['T'] = {
- ['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS },
- ['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS },
- ['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS },
- ['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS },
- ['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK },
- [']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK },
- ['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 },
- ['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 },
['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC },
- ['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC },
- ['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
- ['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
- ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT },
- ['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT },
- ['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD },
- ['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD },
+ ['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC },
+ ['c'] = { CH_SUBSYSTEM, PUSH, ST_TASK_CHECK_DEPS },
+ ['C'] = { CH_SUBSYSTEM, POP, ST_TASK_CHECK_DEPS },
['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC },
- ['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC },
- ['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS },
- ['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS },
- ['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP },
- ['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP },
+ ['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC },
+ ['r'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RELEASE_DEPS },
+ ['R'] = { CH_SUBSYSTEM, POP, ST_TASK_RELEASE_DEPS },
+ ['['] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN },
+ [']'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN },
+ ['i'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN_IF0 },
+ ['I'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN_IF0 },
+ ['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
+ ['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
+ ['g'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKGROUP },
+ ['G'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKGROUP },
+ ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT },
+ ['T'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT },
+ ['w'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT_DEPS },
+ ['W'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT_DEPS },
+ ['y'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKYIELD },
+ ['Y'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKYIELD },
+ },
+ ['A'] = {
+ ['['] = { CH_SUBSYSTEM, PUSH, ST_RT_ATTACHED },
+ [']'] = { CH_SUBSYSTEM, POP, ST_RT_ATTACHED },
+ },
+ ['M'] = {
+ ['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_INTERNAL },
+ ['I'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_INTERNAL },
+ ['u'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_USER },
+ ['U'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_USER },
+ },
+ ['H'] = {
+ ['['] = { CH_SUBSYSTEM, PUSH, ST_RT_WORKER_LOOP },
+ [']'] = { CH_SUBSYSTEM, POP, ST_RT_WORKER_LOOP },
+ },
+ ['C'] = {
+ ['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_INIT },
+ ['I'] = { CH_SUBSYSTEM, POP, ST_RT_INIT },
+ ['f'] = { CH_SUBSYSTEM, PUSH, ST_RT_FORK_CALL },
+ ['F'] = { CH_SUBSYSTEM, POP, ST_RT_FORK_CALL },
},
};
diff --git a/src/emu/openmp/openmp_priv.h b/src/emu/openmp/openmp_priv.h
index a5ee105..2893b32 100644
--- a/src/emu/openmp/openmp_priv.h
+++ b/src/emu/openmp/openmp_priv.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
+/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#ifndef OPENMP_PRIV_H
@@ -15,28 +15,42 @@ enum openmp_chan {
CH_MAX,
};
-
enum openmp_function_values {
- ST_ATTACHED = 1,
- ST_JOIN_BARRIER,
- ST_BARRIER,
- ST_TASKING_BARRIER,
- ST_SPIN_WAIT,
- ST_FOR_STATIC,
- ST_FOR_DYNAMIC_INIT,
- ST_FOR_DYNAMIC_CHUNK,
- ST_SINGLE,
- ST_RELEASE_DEPS,
- ST_TASKWAIT_DEPS,
- ST_INVOKE_TASK,
- ST_INVOKE_TASK_IF0,
+ ST_BARRIER_FORK = 1,
+ ST_BARRIER_JOIN,
+ ST_BARRIER_PLAIN,
+ ST_BARRIER_SPIN_WAIT,
+ ST_BARRIER_TASK,
+ /* Critical */
+ ST_CRITICAL_ACQ,
+ ST_CRITICAL_REL,
+ ST_CRITICAL_SECTION,
+ /* Work-distribution */
+ ST_WD_DISTRIBUTE,
+ ST_WD_FOR_DYNAMIC_CHUNK,
+ ST_WD_FOR_DYNAMIC_INIT,
+ ST_WD_FOR_STATIC,
+ ST_WD_SECTION,
+ ST_WD_SINGLE,
+ /* Task */
ST_TASK_ALLOC,
- ST_TASK_SCHEDULE,
- ST_TASKWAIT,
- ST_TASKYIELD,
+ ST_TASK_CHECK_DEPS,
ST_TASK_DUP_ALLOC,
- ST_CHECK_DEPS,
- ST_TASKGROUP,
+ ST_TASK_RELEASE_DEPS,
+ ST_TASK_RUN,
+ ST_TASK_RUN_IF0,
+ ST_TASK_SCHEDULE,
+ ST_TASK_TASKGROUP,
+ ST_TASK_TASKWAIT,
+ ST_TASK_TASKWAIT_DEPS,
+ ST_TASK_TASKYIELD,
+ /* Runtime */
+ ST_RT_ATTACHED,
+ ST_RT_FORK_CALL,
+ ST_RT_INIT,
+ ST_RT_MICROTASK_INTERNAL,
+ ST_RT_MICROTASK_USER,
+ ST_RT_WORKER_LOOP,
};
struct openmp_thread {
diff --git a/src/emu/openmp/setup.c b/src/emu/openmp/setup.c
index ddad033..4af1610 100644
--- a/src/emu/openmp/setup.c
+++ b/src/emu/openmp/setup.c
@@ -26,29 +26,44 @@ static const char model_name[] = "openmp";
enum { model_id = 'P' };
static struct ev_decl model_evlist[] = {
- PAIR_E("PA[", "PA]", "the attached state")
+ PAIR_B("PBb", "PBB", "plain barrier")
+ PAIR_B("PBj", "PBJ", "join barrier")
+ PAIR_B("PBf", "PBF", "fork barrier")
+ PAIR_B("PBt", "PBT", "tasking barrier")
+ PAIR_B("PBs", "PBS", "spin wait")
- PAIR_E("PBj", "PBJ", "a join barrier")
- PAIR_E("PBb", "PBB", "a barrier")
- PAIR_E("PBt", "PBT", "a tasking barrier")
- PAIR_E("PBs", "PBS", "a spin wait")
+ PAIR_B("PIa", "PIA", "critical acquiring")
+ PAIR_B("PIr", "PIR", "critical releasing")
+ PAIR_B("PI[", "PI]", "critical section")
- PAIR_B("PWs", "PWS", "static for")
- PAIR_B("PWd", "PWD", "dynamic for init")
+ PAIR_B("PWd", "PWD", "distribute")
+ PAIR_B("PWy", "PWY", "dynamic for init")
PAIR_B("PWc", "PWC", "dynamic for chunk")
+ PAIR_B("PWs", "PWS", "static for")
+ PAIR_B("PWe", "PWE", "section")
PAIR_B("PWi", "PWI", "single")
- PAIR_B("PTr", "PTR", "releasing task dependencies")
- PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
- PAIR_B("PT[", "PT]", "invoking a task")
- PAIR_B("PTi", "PTI", "invoking an if0 task")
PAIR_B("PTa", "PTA", "task allocation")
- PAIR_B("PTs", "PTS", "scheduling a task")
- PAIR_E("PTt", "PTT", "a taskwait")
- PAIR_E("PTy", "PTY", "a taskyield")
- PAIR_B("PTd", "PTD", "duplicating a task")
PAIR_B("PTc", "PTC", "checking task dependencies")
- PAIR_E("PTg", "PTG", "a taskgroup")
+ PAIR_B("PTd", "PTD", "duplicating a task")
+ PAIR_B("PTr", "PTR", "releasing task dependencies")
+ PAIR_B("PT[", "PT]", "running a task")
+ PAIR_B("PTi", "PTI", "running an if0 task")
+ PAIR_B("PTs", "PTS", "scheduling a task")
+ PAIR_B("PTg", "PTG", "a taskgroup")
+ PAIR_B("PTt", "PTT", "a taskwait")
+ PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
+ PAIR_B("PTy", "PTY", "a taskyield")
+
+ PAIR_E("PA[", "PA]", "the attached state")
+
+ PAIR_B("PMi", "PMI", "microtask internal")
+ PAIR_B("PMu", "PMU", "microtask user code")
+
+ PAIR_B("PH[", "PH]", "worker loop")
+
+ PAIR_B("PCf", "PCF", "fork call")
+ PAIR_B("PCi", "PCI", "initialization")
{ NULL, NULL },
};
@@ -75,6 +90,10 @@ static const int chan_stack[CH_MAX] = {
[CH_SUBSYSTEM] = 1,
};
+static const int chan_dup[CH_MAX] = {
+ [CH_SUBSYSTEM] = 1,
+};
+
/* ----------------- pvt ------------------ */
static const int pvt_type[CH_MAX] = {
@@ -86,26 +105,42 @@ static const char *pcf_prefix[CH_MAX] = {
};
static const struct pcf_value_label openmp_subsystem_values[] = {
- { ST_ATTACHED, "Attached" },
- { ST_JOIN_BARRIER, "Join barrier" },
- { ST_BARRIER, "Barrier" },
- { ST_TASKING_BARRIER, "Tasking barrier" },
- { ST_SPIN_WAIT, "Spin wait" },
- { ST_FOR_STATIC, "For static" },
- { ST_FOR_DYNAMIC_INIT, "For dynamic init" },
- { ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" },
- { ST_SINGLE, "Single" },
- { ST_RELEASE_DEPS, "Release deps" },
- { ST_TASKWAIT_DEPS, "Taskwait deps" },
- { ST_INVOKE_TASK, "Invoke task" },
- { ST_INVOKE_TASK_IF0, "Invoke task if0" },
- { ST_TASK_ALLOC, "Task alloc" },
- { ST_TASK_SCHEDULE, "Task schedule" },
- { ST_TASKWAIT, "Taskwait" },
- { ST_TASKYIELD, "Taskyield" },
- { ST_TASK_DUP_ALLOC, "Task dup alloc" },
- { ST_CHECK_DEPS, "Check deps" },
- { ST_TASKGROUP, "Taskgroup" },
+ /* Work-distribution */
+ { ST_WD_DISTRIBUTE, "Work-distribution: Distribute" },
+ { ST_WD_FOR_DYNAMIC_CHUNK, "Work-distribution: Dynamic for chunk" },
+ { ST_WD_FOR_DYNAMIC_INIT, "Work-distribution: Dynamic for initialization" },
+ { ST_WD_FOR_STATIC, "Work-distribution: Static for chunk" },
+ { ST_WD_SECTION, "Work-distribution: Section" },
+ { ST_WD_SINGLE, "Work-distribution: Single" },
+ /* Task */
+ { ST_TASK_ALLOC, "Task: Allocation" },
+ { ST_TASK_CHECK_DEPS, "Task: Check deps" },
+ { ST_TASK_DUP_ALLOC, "Task: Duplicating" },
+ { ST_TASK_RELEASE_DEPS, "Task: Releasing deps" },
+ { ST_TASK_RUN, "Task: Running task" },
+ { ST_TASK_RUN_IF0, "Task: Running task if0" },
+ { ST_TASK_SCHEDULE, "Task: Scheduling" },
+ { ST_TASK_TASKGROUP, "Task: Taskgroup" },
+ { ST_TASK_TASKWAIT, "Task: Taskwait" },
+ { ST_TASK_TASKWAIT_DEPS, "Task: Taskwait deps" },
+ { ST_TASK_TASKYIELD, "Task: Taskyield" },
+ /* Critical */
+ { ST_CRITICAL_ACQ, "Critical: Acquiring" },
+ { ST_CRITICAL_REL, "Critical: Releasing" },
+ { ST_CRITICAL_SECTION, "Critical: Section" },
+ /* Barrier */
+ { ST_BARRIER_FORK, "Barrier: Fork" },
+ { ST_BARRIER_JOIN, "Barrier: Join" },
+ { ST_BARRIER_PLAIN, "Barrier: Plain" },
+ { ST_BARRIER_TASK, "Barrier: Task" },
+ { ST_BARRIER_SPIN_WAIT, "Barrier: Spin wait" },
+ /* Runtime */
+ { ST_RT_ATTACHED, "Runtime: Attached" },
+ { ST_RT_FORK_CALL, "Runtime: Fork call" },
+ { ST_RT_INIT, "Runtime: Initialization" },
+ { ST_RT_MICROTASK_INTERNAL, "Runtime: Internal microtask" },
+ { ST_RT_MICROTASK_USER, "Runtime: User microtask" },
+ { ST_RT_WORKER_LOOP, "Runtime: Worker main loop" },
{ -1, NULL },
};
@@ -114,7 +149,7 @@ static const struct pcf_value_label *pcf_labels[CH_MAX] = {
};
static const long prv_flags[CH_MAX] = {
- [CH_SUBSYSTEM] = PRV_SKIPDUP,
+ [CH_SUBSYSTEM] = PRV_EMITDUP,
};
static const struct model_pvt_spec pvt_spec = {
@@ -127,7 +162,7 @@ static const struct model_pvt_spec pvt_spec = {
/* ----------------- tracking ------------------ */
static const int th_track[CH_MAX] = {
- [CH_SUBSYSTEM] = TRACK_TH_RUN,
+ [CH_SUBSYSTEM] = TRACK_TH_ACT,
};
static const int cpu_track[CH_MAX] = {
@@ -141,6 +176,7 @@ static const struct model_chan_spec th_chan = {
.prefix = model_name,
.ch_names = chan_name,
.ch_stack = chan_stack,
+ .ch_dup = chan_dup,
.pvt = &pvt_spec,
.track = th_track,
};
@@ -150,6 +186,7 @@ static const struct model_chan_spec cpu_chan = {
.prefix = model_name,
.ch_names = chan_name,
.ch_stack = chan_stack,
+ .ch_dup = chan_dup,
.pvt = &pvt_spec,
.track = cpu_track,
};
diff --git a/test/rt/CMakeLists.txt b/test/rt/CMakeLists.txt
index 7d11797..d871fa5 100644
--- a/test/rt/CMakeLists.txt
+++ b/test/rt/CMakeLists.txt
@@ -4,3 +4,4 @@
add_subdirectory(nanos6)
add_subdirectory(nodes)
add_subdirectory(nosv)
+add_subdirectory(openmp)
diff --git a/test/rt/openmp/CMakeLists.txt b/test/rt/openmp/CMakeLists.txt
new file mode 100644
index 0000000..1103f8b
--- /dev/null
+++ b/test/rt/openmp/CMakeLists.txt
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND)
+check_linker_flag(C "-fopenmp=libompv" OPENMPV_LINKER_FOUND)
+cmake_path(GET CMAKE_C_COMPILER PARENT_PATH CMAKE_C_COMPILER_PATH)
+
+if(NOT OPENMPV_COMPILER_FOUND OR NOT OPENMPV_LINKER_FOUND)
+ if(ENABLE_ALL_TESTS)
+ message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable OpenMP-V RT tests")
+ else()
+ message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling OpenMP-V RT tests")
+ endif()
+ return()
+endif()
+
+function(openmp_rt_test)
+ ovni_test(${ARGN})
+ target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv"
+ "-no-pedantic")
+ target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv")
+ target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m")
+ set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
+ ENVIRONMENT "OMP_OVNI=1")
+ set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
+ ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni")
+endfunction()
+
+openmp_rt_test(barrier-explicit.c)
+openmp_rt_test(critical.c)
+openmp_rt_test(if0-nested-task.c)
+openmp_rt_test(if0.c)
+openmp_rt_test(multi-parallels.c)
+openmp_rt_test(parallel-for.c)
+openmp_rt_test(parallel-loop.c)
+openmp_rt_test(parallel-nested.c)
+openmp_rt_test(parallel-task.c)
+openmp_rt_test(sections.c)
+openmp_rt_test(simple-task.c)
+openmp_rt_test(task.c)
+openmp_rt_test(taskloop.c)
+openmp_rt_test(taskwait.c)
+openmp_rt_test(team-distribute.c)
+openmp_rt_test(worksharing-and-tasks.c)
+openmp_rt_test(worksharing-mix.c)
+openmp_rt_test(worksharing-task.c)
+openmp_rt_test(worksharing.c)
+openmp_rt_test(worksharing01.c)
+openmp_rt_test(worksharing02.c)
+openmp_rt_test(worksharing03.c)
diff --git a/test/rt/openmp/barrier-explicit.c b/test/rt/openmp/barrier-explicit.c
new file mode 100644
index 0000000..d403725
--- /dev/null
+++ b/test/rt/openmp/barrier-explicit.c
@@ -0,0 +1,47 @@
+#include
+#include
+#include "compat.h"
+
+#define N 100
+
+static void
+dummy_work(double *x, int i)
+{
+ sleep_us(i);
+ x[i] += sqrt((double) i);
+}
+
+int main(void)
+{
+ double x[N] = { 0 };
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (int i = 0; i < N; i++) {
+ #pragma omp task shared(x)
+ dummy_work(x, i);
+ }
+ }
+
+ sleep_us(200);
+ #pragma omp barrier
+ sleep_us(1000);
+ #pragma omp barrier
+
+ #pragma omp single
+ {
+ for (int i = 0; i < N; i++) {
+ #pragma omp task shared(x)
+ dummy_work(x, i);
+ }
+ }
+ }
+
+ double sum = 0.0;
+ for (int i = 0; i < N; i++)
+ sum += x[i];
+
+ printf("sum = %e\n", sum);
+ return 0;
+}
diff --git a/test/rt/openmp/critical.c b/test/rt/openmp/critical.c
new file mode 100644
index 0000000..9a69370
--- /dev/null
+++ b/test/rt/openmp/critical.c
@@ -0,0 +1,16 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ sleep_us(1000);
+
+ #pragma omp critical
+ sleep_us(200);
+
+ sleep_us(1000);
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/if0-nested-task.c b/test/rt/openmp/if0-nested-task.c
new file mode 100644
index 0000000..d09da74
--- /dev/null
+++ b/test/rt/openmp/if0-nested-task.c
@@ -0,0 +1,20 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task if(0)
+ {
+ #pragma omp task
+ {
+ sleep_us(1000);
+ }
+ #pragma omp taskwait
+ }
+ }
+
+ return 0;
+}
+
diff --git a/test/rt/openmp/if0.c b/test/rt/openmp/if0.c
new file mode 100644
index 0000000..20b9c6b
--- /dev/null
+++ b/test/rt/openmp/if0.c
@@ -0,0 +1,17 @@
+#include
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task if(0)
+ {
+ sleep_us(1000);
+ }
+ }
+
+ return 0;
+}
+
diff --git a/test/rt/openmp/multi-parallels.c b/test/rt/openmp/multi-parallels.c
new file mode 100644
index 0000000..45d9561
--- /dev/null
+++ b/test/rt/openmp/multi-parallels.c
@@ -0,0 +1,13 @@
+#include "compat.h"
+
+int main(void)
+{
+ for (int i = 0; i < 10; i++) {
+ #pragma omp parallel
+ {
+ sleep_us(1000);
+ }
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/parallel-for.c b/test/rt/openmp/parallel-for.c
new file mode 100644
index 0000000..c2d74a8
--- /dev/null
+++ b/test/rt/openmp/parallel-for.c
@@ -0,0 +1,29 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ #pragma omp for
+ for (int i = 0; i < 100; i++) {
+ sleep_us(1);
+ }
+
+ #pragma omp for schedule(dynamic, 1)
+ for (int i = 0; i < 100; i++) {
+ sleep_us(i);
+ }
+
+ #pragma omp for
+ for (int i = 0; i < 100; i++) {
+ sleep_us(1);
+ }
+
+ #pragma omp for schedule(dynamic, 1)
+ for (int i = 0; i < 100; i++) {
+ sleep_us(i);
+ }
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/parallel-loop.c b/test/rt/openmp/parallel-loop.c
new file mode 100644
index 0000000..51d85f4
--- /dev/null
+++ b/test/rt/openmp/parallel-loop.c
@@ -0,0 +1,14 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ #pragma omp loop
+ for (int i = 0; i < 100; i++) {
+ sleep_us(1);
+ }
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/parallel-nested.c b/test/rt/openmp/parallel-nested.c
new file mode 100644
index 0000000..ca4f144
--- /dev/null
+++ b/test/rt/openmp/parallel-nested.c
@@ -0,0 +1,22 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ #pragma omp for schedule(dynamic, 1)
+ for (int i = 0; i < 100; i++) {
+ sleep_us(i);
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp for schedule(dynamic, 1)
+ for (int i = 0; i < 100; i++) {
+ sleep_us(i);
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/parallel-task.c b/test/rt/openmp/parallel-task.c
new file mode 100644
index 0000000..7d8b84a
--- /dev/null
+++ b/test/rt/openmp/parallel-task.c
@@ -0,0 +1,26 @@
+#include "compat.h"
+
+static void foo(void)
+{
+ #pragma omp for schedule(dynamic, 1)
+ for (int i = 0; i < 100; i++)
+ sleep_us(i);
+
+ #pragma omp single
+ for (int i = 0; i < 100; i++)
+ {
+ #pragma omp task
+ sleep_us(10);
+ }
+}
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ foo();
+ foo();
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/sections.c b/test/rt/openmp/sections.c
new file mode 100644
index 0000000..ed39a49
--- /dev/null
+++ b/test/rt/openmp/sections.c
@@ -0,0 +1,51 @@
+#include
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel sections
+ {
+ #pragma omp section
+ { sleep_us(1001); printf("1001\n"); }
+ #pragma omp section
+ { sleep_us(1002); printf("1002\n"); }
+ #pragma omp section
+ { sleep_us(1003); printf("1003\n"); }
+ #pragma omp section
+ { sleep_us(1004); printf("1004\n"); }
+ #pragma omp section
+ sleep_us(1005);
+ #pragma omp section
+ sleep_us(1006);
+ #pragma omp section
+ sleep_us(1007);
+ #pragma omp section
+ sleep_us(1008);
+ #pragma omp section
+ sleep_us(1009);
+ #pragma omp section
+ sleep_us(1010);
+ #pragma omp section
+ sleep_us(1011);
+ #pragma omp section
+ sleep_us(1012);
+ #pragma omp section
+ sleep_us(1013);
+ #pragma omp section
+ sleep_us(1014);
+ #pragma omp section
+ sleep_us(1015);
+ #pragma omp section
+ sleep_us(1016);
+ #pragma omp section
+ sleep_us(1017);
+ #pragma omp section
+ sleep_us(1018);
+ #pragma omp section
+ sleep_us(1019);
+ #pragma omp section
+ sleep_us(1020);
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/simple-task.c b/test/rt/openmp/simple-task.c
new file mode 100644
index 0000000..0b8a618
--- /dev/null
+++ b/test/rt/openmp/simple-task.c
@@ -0,0 +1,23 @@
+#include "compat.h"
+
+int main(void)
+{
+ int a;
+ int *p = &a;
+
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task depend(out : p[0])
+ {
+ sleep_us(1000);
+ }
+ for (int i = 0; i < 10000; i++)
+ {
+ #pragma omp task depend(in : p[0])
+ sleep_us(1);
+ }
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/task.c b/test/rt/openmp/task.c
new file mode 100644
index 0000000..7d82c08
--- /dev/null
+++ b/test/rt/openmp/task.c
@@ -0,0 +1,19 @@
+#include
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ for (int i = 0; i < 10; i++) {
+ #pragma omp task
+ {
+ printf("%d\n", i);
+ sleep_us(100);
+ }
+ }
+ #pragma omp barrier
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/taskloop.c b/test/rt/openmp/taskloop.c
new file mode 100644
index 0000000..8a10d6e
--- /dev/null
+++ b/test/rt/openmp/taskloop.c
@@ -0,0 +1,17 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp taskloop
+ for (int i = 0; i < 10000; i++)
+ {
+ #pragma omp task
+ sleep_us(1);
+ }
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/taskwait.c b/test/rt/openmp/taskwait.c
new file mode 100644
index 0000000..dbba63c
--- /dev/null
+++ b/test/rt/openmp/taskwait.c
@@ -0,0 +1,42 @@
+#include "compat.h"
+#include
+
+int main(void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task label("A")
+ {
+ sleep_us(5000);
+ printf("A\n");
+ }
+
+ #pragma omp task label("B")
+ {
+ #pragma omp task label("B1")
+ {
+ sleep_us(2000);
+ printf("B1\n");
+ }
+
+ /* Shouldn't wait for task A */
+ #pragma omp taskwait
+
+ #pragma omp task
+ {
+ sleep_us(1000);
+ printf("B2\n");
+ }
+ }
+
+ #pragma omp task label("C")
+ {
+ printf("C\n");
+ }
+ }
+
+ /* Expected output C B1 B2 A */
+
+ return 0;
+}
diff --git a/test/rt/openmp/team-distribute.c b/test/rt/openmp/team-distribute.c
new file mode 100644
index 0000000..daf5051
--- /dev/null
+++ b/test/rt/openmp/team-distribute.c
@@ -0,0 +1,14 @@
+#include
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp teams num_teams(2)
+ {
+ #pragma omp distribute parallel for
+ for (volatile int i = 0; i < 1000; i++)
+ sleep_us(100 + i);
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/worksharing-and-tasks.c b/test/rt/openmp/worksharing-and-tasks.c
new file mode 100644
index 0000000..7d16634
--- /dev/null
+++ b/test/rt/openmp/worksharing-and-tasks.c
@@ -0,0 +1,34 @@
+#include
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ //#pragma omp single nowait
+ for (int i = 0; i < 100; i++) {
+ #pragma omp task
+ sleep_us(10);
+ }
+
+ /* Wait a bit for task allocation */
+ sleep_us(1000);
+
+ /* Occupy 4 CPUs with sections */
+ #pragma omp sections nowait
+ {
+ #pragma omp section
+ { sleep_us(1001); printf("1001\n"); }
+ #pragma omp section
+ { sleep_us(1002); printf("1002\n"); }
+ #pragma omp section
+ { sleep_us(1003); printf("1003\n"); }
+ #pragma omp section
+ { sleep_us(1004); printf("1004\n"); }
+ }
+
+ #pragma omp taskwait
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/worksharing-mix.c b/test/rt/openmp/worksharing-mix.c
new file mode 100644
index 0000000..86c4e9c
--- /dev/null
+++ b/test/rt/openmp/worksharing-mix.c
@@ -0,0 +1,66 @@
+#include
+#include
+#include "compat.h"
+
+/* Test several work-distribution and task constructs, so we can generate a
+ * trace that includes most of the states. */
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ #pragma omp for
+ for (int i = 0; i < 100; i++) {
+ sleep_us(1);
+ }
+
+ #pragma omp sections
+ {
+ #pragma omp section
+ { sleep_us(101); printf("101\n"); }
+ #pragma omp section
+ { sleep_us(102); printf("102\n"); }
+ #pragma omp section
+ { sleep_us(103); printf("103\n"); }
+ #pragma omp section
+ { sleep_us(104); printf("104\n"); }
+ }
+
+ #pragma omp for
+ for (int i = 0; i < 100; i++) {
+ sleep_us(1);
+ }
+
+ #pragma omp single
+ for (int i = 0; i < 100; i++)
+ {
+ #pragma omp task
+ sleep_us(10);
+ }
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp critical
+ sleep_us(20);
+
+ #pragma omp barrier
+
+ #pragma omp for
+ for (int i = 0; i < 100; i++) {
+ sleep_us(1);
+ }
+ #pragma omp for schedule(dynamic, 1)
+ for (int i = 0; i < 100; i++) {
+ sleep_us(i);
+ }
+ }
+
+ // FIXME: Crashes OpenMP-V runtime
+ //#pragma omp distribute parallel for
+ //for (int i = 0; i < 1000; i++) {
+ // sleep_us(1);
+ //}
+
+ return 0;
+}
diff --git a/test/rt/openmp/worksharing-task.c b/test/rt/openmp/worksharing-task.c
new file mode 100644
index 0000000..1e10834
--- /dev/null
+++ b/test/rt/openmp/worksharing-task.c
@@ -0,0 +1,25 @@
+#include
+#include "compat.h"
+
+static void foo(void)
+{
+ #pragma omp for
+ for (int i = 0; i < 100; ++i)
+ {
+ #pragma omp task
+ {
+ sleep_us(1);
+ }
+ }
+}
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ foo();
+ foo();
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/worksharing.c b/test/rt/openmp/worksharing.c
new file mode 100644
index 0000000..7a740a4
--- /dev/null
+++ b/test/rt/openmp/worksharing.c
@@ -0,0 +1,16 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ {
+ #pragma omp for schedule(dynamic) ordered label("omp for dynamic")
+ for (int i = 0; i < 100; i++)
+ sleep_us(100);
+
+ #pragma omp single label("single")
+ sleep_us(1000);
+ }
+
+ return 0;
+}
diff --git a/test/rt/openmp/worksharing01.c b/test/rt/openmp/worksharing01.c
new file mode 100644
index 0000000..d214fda
--- /dev/null
+++ b/test/rt/openmp/worksharing01.c
@@ -0,0 +1,11 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ #pragma omp for schedule(static, 30)
+ for (int i = 0; i < 100; i++)
+ sleep_us(10);
+
+ return 0;
+}
diff --git a/test/rt/openmp/worksharing02.c b/test/rt/openmp/worksharing02.c
new file mode 100644
index 0000000..f32c67e
--- /dev/null
+++ b/test/rt/openmp/worksharing02.c
@@ -0,0 +1,12 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp target
+ #pragma omp teams num_teams(1)
+ #pragma omp distribute dist_schedule(static, 30)
+ for (int i = 0; i < 100; i++)
+ sleep_us(10);
+
+ return 0;
+}
diff --git a/test/rt/openmp/worksharing03.c b/test/rt/openmp/worksharing03.c
new file mode 100644
index 0000000..a9fa983
--- /dev/null
+++ b/test/rt/openmp/worksharing03.c
@@ -0,0 +1,11 @@
+#include "compat.h"
+
+int main(void)
+{
+ #pragma omp parallel
+ #pragma omp for schedule(dynamic)
+ for (int i = 0; i < 100; i++)
+ sleep_us(10);
+
+ return 0;
+}