Update OpenMP emulation model
Add more tests, subsystem states and documentation.
This commit is contained in:
parent
bf2b3b73a0
commit
55318d9da7
@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Add OpenMP model (`P`) at version 1.1.0 (currently it only supports subsystems
|
||||
and only works with the OpenMP-V runtime, on top of nOS-V).
|
||||
|
||||
### Changed
|
||||
|
||||
- Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}.
|
||||
|
@ -16,6 +16,8 @@ window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_custom_color_enabled true
|
||||
window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
|
@ -4,9 +4,9 @@ ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread >
|
||||
< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the ACTIVE thread >
|
||||
################################################################################
|
||||
window_name Thread: OpenMP subsystem of the RUNNING thread
|
||||
window_name Thread: OpenMP subsystem of the ACTIVE thread
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 0
|
||||
@ -16,6 +16,8 @@ window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_custom_color_enabled true
|
||||
window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126}
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
@ -38,5 +40,5 @@ window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 50
|
||||
window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread"
|
||||
window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the ACTIVE thread"
|
||||
|
||||
|
@ -433,86 +433,130 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`:
|
||||
|
||||
List of events for the model *openmp* with identifier **`P`** at version `1.1.0`:
|
||||
<dl>
|
||||
<dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt>
|
||||
<dd>enters the attached state</dd>
|
||||
<dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt>
|
||||
<dd>leaves the attached state</dd>
|
||||
<dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt>
|
||||
<dd>enters a join barrier</dd>
|
||||
<dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt>
|
||||
<dd>leaves a join barrier</dd>
|
||||
<dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt>
|
||||
<dd>enters a barrier</dd>
|
||||
<dd>begins plain barrier</dd>
|
||||
<dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt>
|
||||
<dd>leaves a barrier</dd>
|
||||
<dd>ceases plain barrier</dd>
|
||||
<dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt>
|
||||
<dd>begins join barrier</dd>
|
||||
<dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt>
|
||||
<dd>ceases join barrier</dd>
|
||||
<dt><a id="PBf" href="#PBf"><pre>PBf</pre></a></dt>
|
||||
<dd>begins fork barrier</dd>
|
||||
<dt><a id="PBF" href="#PBF"><pre>PBF</pre></a></dt>
|
||||
<dd>ceases fork barrier</dd>
|
||||
<dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt>
|
||||
<dd>enters a tasking barrier</dd>
|
||||
<dd>begins tasking barrier</dd>
|
||||
<dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt>
|
||||
<dd>leaves a tasking barrier</dd>
|
||||
<dd>ceases tasking barrier</dd>
|
||||
<dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt>
|
||||
<dd>enters a spin wait</dd>
|
||||
<dd>begins spin wait</dd>
|
||||
<dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt>
|
||||
<dd>leaves a spin wait</dd>
|
||||
<dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt>
|
||||
<dd>begins static for</dd>
|
||||
<dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt>
|
||||
<dd>ceases static for</dd>
|
||||
<dd>ceases spin wait</dd>
|
||||
<dt><a id="PIa" href="#PIa"><pre>PIa</pre></a></dt>
|
||||
<dd>begins critical acquiring</dd>
|
||||
<dt><a id="PIA" href="#PIA"><pre>PIA</pre></a></dt>
|
||||
<dd>ceases critical acquiring</dd>
|
||||
<dt><a id="PIr" href="#PIr"><pre>PIr</pre></a></dt>
|
||||
<dd>begins critical releasing</dd>
|
||||
<dt><a id="PIR" href="#PIR"><pre>PIR</pre></a></dt>
|
||||
<dd>ceases critical releasing</dd>
|
||||
<dt><a id="PI[" href="#PI["><pre>PI[</pre></a></dt>
|
||||
<dd>begins critical section</dd>
|
||||
<dt><a id="PI]" href="#PI]"><pre>PI]</pre></a></dt>
|
||||
<dd>ceases critical section</dd>
|
||||
<dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt>
|
||||
<dd>begins dynamic for init</dd>
|
||||
<dd>begins distribute</dd>
|
||||
<dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt>
|
||||
<dd>ceases distribute</dd>
|
||||
<dt><a id="PWy" href="#PWy"><pre>PWy</pre></a></dt>
|
||||
<dd>begins dynamic for init</dd>
|
||||
<dt><a id="PWY" href="#PWY"><pre>PWY</pre></a></dt>
|
||||
<dd>ceases dynamic for init</dd>
|
||||
<dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt>
|
||||
<dd>begins dynamic for chunk</dd>
|
||||
<dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt>
|
||||
<dd>ceases dynamic for chunk</dd>
|
||||
<dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt>
|
||||
<dd>begins static for</dd>
|
||||
<dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt>
|
||||
<dd>ceases static for</dd>
|
||||
<dt><a id="PWe" href="#PWe"><pre>PWe</pre></a></dt>
|
||||
<dd>begins section</dd>
|
||||
<dt><a id="PWE" href="#PWE"><pre>PWE</pre></a></dt>
|
||||
<dd>ceases section</dd>
|
||||
<dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt>
|
||||
<dd>begins single</dd>
|
||||
<dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt>
|
||||
<dd>ceases single</dd>
|
||||
<dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt>
|
||||
<dd>begins releasing task dependencies</dd>
|
||||
<dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt>
|
||||
<dd>ceases releasing task dependencies</dd>
|
||||
<dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt>
|
||||
<dd>begins waiting for taskwait dependencies</dd>
|
||||
<dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt>
|
||||
<dd>ceases waiting for taskwait dependencies</dd>
|
||||
<dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt>
|
||||
<dd>begins invoking a task</dd>
|
||||
<dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt>
|
||||
<dd>ceases invoking a task</dd>
|
||||
<dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt>
|
||||
<dd>begins invoking an if0 task</dd>
|
||||
<dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt>
|
||||
<dd>ceases invoking an if0 task</dd>
|
||||
<dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt>
|
||||
<dd>begins task allocation</dd>
|
||||
<dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt>
|
||||
<dd>ceases task allocation</dd>
|
||||
<dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt>
|
||||
<dd>begins scheduling a task</dd>
|
||||
<dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt>
|
||||
<dd>ceases scheduling a task</dd>
|
||||
<dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt>
|
||||
<dd>enters a taskwait</dd>
|
||||
<dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt>
|
||||
<dd>leaves a taskwait</dd>
|
||||
<dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt>
|
||||
<dd>enters a taskyield</dd>
|
||||
<dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt>
|
||||
<dd>leaves a taskyield</dd>
|
||||
<dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt>
|
||||
<dd>begins duplicating a task</dd>
|
||||
<dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt>
|
||||
<dd>ceases duplicating a task</dd>
|
||||
<dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt>
|
||||
<dd>begins checking task dependencies</dd>
|
||||
<dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt>
|
||||
<dd>ceases checking task dependencies</dd>
|
||||
<dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt>
|
||||
<dd>begins duplicating a task</dd>
|
||||
<dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt>
|
||||
<dd>ceases duplicating a task</dd>
|
||||
<dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt>
|
||||
<dd>begins releasing task dependencies</dd>
|
||||
<dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt>
|
||||
<dd>ceases releasing task dependencies</dd>
|
||||
<dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt>
|
||||
<dd>begins running a task</dd>
|
||||
<dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt>
|
||||
<dd>ceases running a task</dd>
|
||||
<dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt>
|
||||
<dd>begins running an if0 task</dd>
|
||||
<dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt>
|
||||
<dd>ceases running an if0 task</dd>
|
||||
<dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt>
|
||||
<dd>begins scheduling a task</dd>
|
||||
<dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt>
|
||||
<dd>ceases scheduling a task</dd>
|
||||
<dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt>
|
||||
<dd>enters a taskgroup</dd>
|
||||
<dd>begins a taskgroup</dd>
|
||||
<dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt>
|
||||
<dd>leaves a taskgroup</dd>
|
||||
<dd>ceases a taskgroup</dd>
|
||||
<dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt>
|
||||
<dd>begins a taskwait</dd>
|
||||
<dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt>
|
||||
<dd>ceases a taskwait</dd>
|
||||
<dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt>
|
||||
<dd>begins waiting for taskwait dependencies</dd>
|
||||
<dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt>
|
||||
<dd>ceases waiting for taskwait dependencies</dd>
|
||||
<dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt>
|
||||
<dd>begins a taskyield</dd>
|
||||
<dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt>
|
||||
<dd>ceases a taskyield</dd>
|
||||
<dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt>
|
||||
<dd>enters the attached state</dd>
|
||||
<dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt>
|
||||
<dd>leaves the attached state</dd>
|
||||
<dt><a id="PMi" href="#PMi"><pre>PMi</pre></a></dt>
|
||||
<dd>begins microtask internal</dd>
|
||||
<dt><a id="PMI" href="#PMI"><pre>PMI</pre></a></dt>
|
||||
<dd>ceases microtask internal</dd>
|
||||
<dt><a id="PMu" href="#PMu"><pre>PMu</pre></a></dt>
|
||||
<dd>begins microtask user code</dd>
|
||||
<dt><a id="PMU" href="#PMU"><pre>PMU</pre></a></dt>
|
||||
<dd>ceases microtask user code</dd>
|
||||
<dt><a id="PH[" href="#PH["><pre>PH[</pre></a></dt>
|
||||
<dd>begins worker loop</dd>
|
||||
<dt><a id="PH]" href="#PH]"><pre>PH]</pre></a></dt>
|
||||
<dd>ceases worker loop</dd>
|
||||
<dt><a id="PCf" href="#PCf"><pre>PCf</pre></a></dt>
|
||||
<dd>begins fork call</dd>
|
||||
<dt><a id="PCF" href="#PCF"><pre>PCF</pre></a></dt>
|
||||
<dd>ceases fork call</dd>
|
||||
<dt><a id="PCi" href="#PCi"><pre>PCi</pre></a></dt>
|
||||
<dd>begins initialization</dd>
|
||||
<dt><a id="PCI" href="#PCI"><pre>PCI</pre></a></dt>
|
||||
<dd>ceases initialization</dd>
|
||||
</dl>
|
||||
|
||||
## Model tampi
|
||||
|
BIN
doc/user/emulation/fig/openmp-subsystem.png
Normal file
BIN
doc/user/emulation/fig/openmp-subsystem.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.4 KiB |
@ -1,164 +1,243 @@
|
||||
# OpenMP Model
|
||||
# OpenMP model
|
||||
|
||||
The LLVM OpenMP Runtime is an integral component of the LLVM compiler
|
||||
infrastructure that provides support for the OpenMP (Open Multi-Processing)
|
||||
programming model.
|
||||
The [OpenMP programming model](https://www.openmp.org) is a widely used API and
|
||||
set of directives for parallel programming, allowing developers to write
|
||||
multi-threaded and multi-process applications more easily. In this document we
|
||||
refer to the
|
||||
[version 5.2 of the OpenMP specification](https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf).
|
||||
|
||||
OpenMP is a widely used API and set of directives for parallel programming,
|
||||
allowing developers to write multi-threaded and multi-process applications more
|
||||
easily.
|
||||
The [LLVM OpenMP Runtime](https://openmp.llvm.org/design/Runtimes.html) provides
|
||||
an implementation of the OpenMP specification as a component of the LLVM
|
||||
compiler infrastructure. We have modified the LLVM OpenMP runtime to run on top
|
||||
of the [nOS-V](https://gitlab.bsc.es/nos-v/nos-v) runtime as part of the
|
||||
[OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **OpenMP-V**.
|
||||
|
||||
This documentation is about an OpenMP runtime built on top of [nOS-V][nosv],
|
||||
leveraging its thread management capabilities while retaining the fundamental
|
||||
characteristics of the original runtime.
|
||||
We have added instrumentation events to OpenMP-V designed to be enabled along
|
||||
the [nOS-V instrumentation](nosv.md). This document describes all the
|
||||
instrumentation features included in our modified OpenMP-V runtime to identify
|
||||
what is happening. This data is useful for both users and developers of the
|
||||
OpenMP runtime to analyze issues and undesired behaviors.
|
||||
|
||||
While the modifications introduced to the runtime may appear to be minor, it's
|
||||
important to note that this enhanced version is not API compatible with the
|
||||
original runtime. As a result, it is mandatory to use the clang built in the same
|
||||
[LLVM Project][llvm].
|
||||
!!! Note
|
||||
|
||||
This document describes all the instrumentation features included in the runtime
|
||||
by both nOS-V and OpenMP to monitor task execution and the execution flow within
|
||||
the runtime library to identify what is happening. This data is useful for both
|
||||
users and developers of the OpenMP runtime to analyze issues and undesired
|
||||
behaviors.
|
||||
Instrumenting the original OpenMP runtime from the LLVM project is planned
|
||||
but is not yet posible. For now you must use the modified OpenMP-V runtime
|
||||
with nOS-V.
|
||||
|
||||
[llvm]: https://pm.bsc.es/llvm-ompss
|
||||
[nosv]: https://gitlab.bsc.es/nos-v/nos-v
|
||||
## Enable the instrumentation
|
||||
|
||||
## How to Generate Execution Traces
|
||||
To generate runtime traces, you will have to:
|
||||
|
||||
In order to build the OpenMP runtime nOS-V must be provided by using
|
||||
`PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a
|
||||
runtime without instrumentation. However, the user may be able to generate
|
||||
execution traces by enabling nOS-V instrumentation through
|
||||
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a
|
||||
nOS-V installation built with ovni.
|
||||
1. **Build nOS-V with ovni support:** Refer to the
|
||||
[nOS-V
|
||||
documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md).
|
||||
Typically you should use the `--with-ovni` option at configure time to specify
|
||||
where ovni is installed.
|
||||
2. **Build OpenMP-V with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH`
|
||||
environment variable to specify the nOS-V and ovni installation
|
||||
when configuring CMake.
|
||||
3. **Enable the instrumentation in nOS-V at runtime:** Refer to the
|
||||
[nOS-V documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md)
|
||||
to find out how to enable the tracing at runtime. Typically you can just set
|
||||
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
|
||||
4. **Enable the instrumentation of OpenMP-V at runtime:** Set the environment
|
||||
variable `OMP_OVNI=1`.
|
||||
|
||||
Building OpenMP with instrumentation requires to pass ovni pkg-config path to
|
||||
`PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is
|
||||
because OpenMP is dependent of nOS-V to generate complete execution traces.
|
||||
Currently there is only support for the subsystem view, which is documented
|
||||
below. The view is complemented with the information of [nOS-V views](nosv.md),
|
||||
as OpenMP-V uses nOS-V tasks to run the workers.
|
||||
|
||||
By default, OpenMP will not instrument anything. To enable instrumentation the
|
||||
user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
|
||||
## Subsystem view
|
||||
|
||||
The following sections will describe the OpenMP execution trace views and what
|
||||
information is shown there.
|
||||
|
||||
## nOS-V Task Type
|
||||
|
||||
As said in the previous sections. This OpenMP runtime is built on top of nOS-V.
|
||||
So the user can explore what does the execution do there. Here we only describe
|
||||
the task type view. For other views please take a look at the nOS-V chapter.
|
||||
|
||||
In OpenMP, every thread that is launched (main thread included) is shown in a task
|
||||
type with label "openmp". In a task application, every task call will be seen with
|
||||
a task type with label "file:line:col" format referring to the pragma location. This
|
||||
can be changed by using the clause label(string-literal).
|
||||
|
||||
OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for
|
||||
more information. Nevertheless, the OpenMP task view shows it.
|
||||
|
||||
## OpenMP Subsystem
|
||||
![Subsystem view example](fig/openmp-subsystem.png)
|
||||
|
||||
This view illustrates the activities of each thread with different states:
|
||||
|
||||
- **Attached**: The thread is attached.
|
||||
- **Work-distribution subsystem**: Related to work-distribution constructs,
|
||||
[in Chapter 11][workdis].
|
||||
|
||||
- **Join barrier**: The thread is in the implicit barrier of the parallel region.
|
||||
- **Distribute**: Running a *Distribute* region.
|
||||
|
||||
- **Tasking barrier**: The thread is in the additional tasking barrier trying to
|
||||
execute tasks. This event happens if executed with KMP_TASKING=1.
|
||||
- **Dynamic for chunk**: Running a chunk of a dynamic *for*, which often
|
||||
involve running more than one iteration of the loop. See the
|
||||
[limitations](#dynamic_for) below.
|
||||
|
||||
- **Spin wait**: The thread spin waits for a condition. Usually this event happens
|
||||
in a barrier while waiting for the other threads to reach the barrier. The thread
|
||||
also tries to execute tasks.
|
||||
- **Dynamic for initialization**: Preparing a dynamic *for*.
|
||||
|
||||
- **For static**: Executing a for static. The length of the event represents all the
|
||||
chunks of iterations executed by the thread. See "Limitations" section.
|
||||
- **Static for chunk**: Executing the assigned iterations of an static
|
||||
*for*.
|
||||
|
||||
- **For dynamic init**: Running the initialization of an OpenMP for dynamic.
|
||||
- **Single**: Running a *Single* region. All threads of the parallel region
|
||||
participate.
|
||||
|
||||
- **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To
|
||||
clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and
|
||||
from 8 to 12, two different events will be shown. See "Limitations" section.
|
||||
- **Section**: Running a *Section* region. All threads of the parallel region
|
||||
participate.
|
||||
|
||||
- **Single**: Running a Single region. All threads of the parallel region will emit
|
||||
the event.
|
||||
- **Task subsystem**: Related to tasking constructs, [in Chapter 12][tasking].
|
||||
|
||||
- **Release deps**: When finishing a task, trying to release dependencies. This
|
||||
event happens although the task has no dependencies.
|
||||
- **Allocation**: Allocating the task descriptor.
|
||||
|
||||
- **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled.
|
||||
This appears typically in a task if0 with dependencies or a taskwait with deps.
|
||||
- **Check deps**: Checking if the task has pending dependencies to be
|
||||
fulfilled. When all dependencies are fulfilled the task will be scheduled.
|
||||
|
||||
- **Invoke task**: Executing a task.
|
||||
- **Duplicating**: Duplicating the task descriptor in a taskloop.
|
||||
|
||||
- **Invoke task if0**: Executing a task if0.
|
||||
- **Releasing deps**: Releasing dependencies at the end of a task. This
|
||||
state is always present even if the task has no dependencies.
|
||||
|
||||
- **Task alloc**: Allocating the task descriptor.
|
||||
- **Running task**: Executing a task.
|
||||
|
||||
- **Task schedule**: Adding the task to the scheduler.
|
||||
- **Running task if0**: Executing a task if0.
|
||||
|
||||
- **Taskwait**: Running a taskwait.
|
||||
- **Scheduling**: Adding the task to the scheduler for execution.
|
||||
|
||||
- **Taskyield**: Running a taskyield.
|
||||
- **Taskgroup**: Waiting in a *taskgroup* construct.
|
||||
|
||||
- **Task dup alloc**: Duplicating the task descriptor in a taskloop.
|
||||
- **Taskwait**: Waiting in a *taskwait* construct.
|
||||
|
||||
- **Check deps**: Checking if the task has pending dependencies to be fulfilled. This
|
||||
means that if all dependencies are fulfilled the task will be scheduled.
|
||||
- **Taskwait deps**: Trying to execute tasks until dependencies have been
|
||||
fulfilled. This appears typically in a task if0 with dependencies or a
|
||||
taskwait with deps.
|
||||
|
||||
- **Taskyield**: Performing a *taskyield* construct.
|
||||
|
||||
- **Taskgroup**: Running a taskgroup.
|
||||
- **Critical subsystem**: Related to the *critical* Constuct, in [Section 15.2][critical].
|
||||
|
||||
- **Acquiring**: Waiting to acquire a *Critical* section.
|
||||
|
||||
- **Section**: Running the *Critical* section.
|
||||
|
||||
- **Releasing**: Waiting to release a *Critical* section.
|
||||
|
||||
- **Barrier subsystem**: Related to barriers, in [Section 15.3][barrier].
|
||||
**All barriers can try to execute tasks**.
|
||||
|
||||
- **Barrier: Fork**: Workers wait for a release signal from the master thread to
|
||||
continue. The master can continue as soon as it signals the workers. It is
|
||||
done at the beginning of a fork-join region.
|
||||
|
||||
- **Barrier: Join**: The master thread waits until all workers finish their work.
|
||||
Workers can continue as soon as they signal the master. It is done at the
|
||||
end of a fork-join region.
|
||||
|
||||
- **Barrier: Plain**: Performing a plain barrier, which waits for a release
|
||||
signal from the master thread to continue. It is done at the beginning of
|
||||
a fork-join region, in the `__kmp_join_barrier()` function.
|
||||
|
||||
- **Barrier: Task**: Blocked in an additional tasking barrier *until all previous
|
||||
tasks have been executed*. Only happens when executed with `KMP_TASKING=1`.
|
||||
|
||||
- **Runtime subsystem**: Internal operations of the runtime.
|
||||
|
||||
- **Attached**: Present after the call to `nosv_attach()` and before
|
||||
`nosv_detach()`. This state is a hack.
|
||||
|
||||
- **Fork call**: Preparing a parallel section using the fork-join model.
|
||||
Only called from the master thread.
|
||||
|
||||
- **Init**: Initializing the OpenMP-V runtime.
|
||||
|
||||
- **Internal microtask**: Running a internal OpenMP-V function as a microtask.
|
||||
|
||||
- **User microtask**: Running user code as a microtask in a worker thread.
|
||||
|
||||
- **Worker main Loop**: Running the main loop, where the workers run the
|
||||
fork barrier, run a microtask and perform a join barrier until there is no
|
||||
more work.
|
||||
|
||||
!!! Note
|
||||
|
||||
The generated HTML version of the OpenMP 5.2 specification has some parts
|
||||
missing, so we link directly to the PDF file which may not work in some
|
||||
browsers.
|
||||
|
||||
[workdis]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.11
|
||||
[tasking]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.12
|
||||
[critical]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.2
|
||||
[barrier]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.3
|
||||
|
||||
## Limitations
|
||||
|
||||
By the way how OpenMP is implemented. There are some instrumentation points that
|
||||
violate ovni subsystem rules. This mostly happens because some directives are lowered
|
||||
partially in the transformed user code, so it is not easy to wrap them into a
|
||||
Single-entry single-exit (SESE) region, like we would do with a regular task invocation,
|
||||
for example.
|
||||
As the compiler generates the code that perform the calls to the OpenMP-V
|
||||
runtime, there are some parts of the execution that are complicated to
|
||||
instrument by just placing a pair of events to delimite a function.
|
||||
|
||||
All problematic directives are described here so the user is able to understand what
|
||||
is being show in the traces
|
||||
For those cases we use an approximation which is documented in the following
|
||||
subsections.
|
||||
|
||||
- **Task if0**: The lowered user code of a task if0 is:
|
||||
... = __kmpc_omp_task_alloc(...);
|
||||
__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
|
||||
__kmpc_omp_task_begin_if0(...);
|
||||
// Call to the user code
|
||||
omp_task_entry_(...);
|
||||
__kmpc_omp_task_complete_if0(...);
|
||||
### Dynamic for
|
||||
|
||||
Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As
|
||||
this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0`
|
||||
and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent.
|
||||
The generated code of a *dynamic for* has the following structure:
|
||||
|
||||
- **For static**: The lowered user code of a for static is:
|
||||
// Parallel code
|
||||
__kmpc_for_static_init_4(...);
|
||||
for ( i = ...; i <= ...; ++i )
|
||||
;
|
||||
__kmpc_for_static_fini(...);
|
||||
```c
|
||||
__kmpc_dispatch_init_4(...);
|
||||
while (__kmpc_dispatch_next_4(...)) {
|
||||
for (i = ...; i <= ...; i++) {
|
||||
// User code ...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
|
||||
this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4`
|
||||
and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent.
|
||||
The function `__kmpc_dispatch_next_4()` returns `true` if there are more
|
||||
chunks (group of iterations) to be executed by the thread, otherwise it returns
|
||||
`false`.
|
||||
|
||||
- **For dynamic**: The lowered user code of a for dynamic is:
|
||||
Ideally we want to instrument each chunk with a pair of begin and end events.
|
||||
|
||||
__kmpc_dispatch_init_4(...);
|
||||
while ( __kmpc_dispatch_next_4(...))
|
||||
{
|
||||
for ( i = ...; i <= ...; ++i )
|
||||
;
|
||||
}
|
||||
The problem with the instrumentation is that there is no easy way of determining
|
||||
if the call to `__kmpc_dispatch_next_4()` is processing the first chunk, just
|
||||
after `__kmpc_dispatch_init_4()`, or is coming from other chunks due to the
|
||||
while loop.
|
||||
|
||||
Ideally, the for loop should be called by the runtime to ensure the SESE structure. As
|
||||
this code is generated by the compiler the subsystem view shows:
|
||||
1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init**
|
||||
2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`.
|
||||
with the event **For dynamic chunk**.
|
||||
3. How long it takes to run a loop iteration chunk between the last and the previous
|
||||
`__kmpc_dispatch_next_4` call with the event **For dynamic chunk**.
|
||||
Therefore, from the `__kmpc_dispatch_next_4()` alone, we cannot determine if we
|
||||
need to only emit a single "begin a new chunk" event or we need to emit the pair
|
||||
of events "finish the last chunk" and "begin a new one".
|
||||
|
||||
So, as a workaround, we emit an event from the end of `__kmpc_dispatch_init_4()`
|
||||
starting a new chunk (which is fake), and then from `__kmpc_dispatch_next_4()` we
|
||||
always emit the "finish the last chunk" and "begin a new one" events (unless
|
||||
there are no more chunks, in which case we don't emit the "begin a new one"
|
||||
event).
|
||||
|
||||
This will cause an spurious *Work-distribution: Dynamic for chunk* state at the
|
||||
beginning of each dynamic for, which should be very short and is not really a
|
||||
chunk.
|
||||
|
||||
### Static for
|
||||
|
||||
The generated code of an *static for* has the following structure:
|
||||
|
||||
```c
|
||||
__kmpc_for_static_init_4(...);
|
||||
for (i = ...; i <= ...; i++) {
|
||||
// User code ...
|
||||
}
|
||||
__kmpc_for_static_fini(...);
|
||||
```
|
||||
|
||||
As this code is generated by the compiler we cannot easily add the begin/end
|
||||
pair of events to mark the *Work-distribution: Static for chunk* state.
|
||||
|
||||
We assume that by placing the "begin processing a chunk" event at the end of
|
||||
`__kmpc_for_static_init_4()` and the "end processing the chunk" event at
|
||||
the beginning of `__kmpc_for_static_fini()` is equivalent to adding the
|
||||
events surrounding the for loop.
|
||||
|
||||
### Task if0
|
||||
|
||||
The generated code of an *if0 task* has the following structure:
|
||||
|
||||
```c
|
||||
... = __kmpc_omp_task_alloc(...);
|
||||
__kmpc_omp_taskwait_deps_51(...); // If task has dependencies
|
||||
__kmpc_omp_task_begin_if0(...);
|
||||
// Call to the user code
|
||||
omp_task_entry_(...);
|
||||
__kmpc_omp_task_complete_if0(...);
|
||||
```
|
||||
|
||||
Instead of injecting the begin and end events in the user code, we
|
||||
approximate it by placing the "begin if0 task" event at the end of the
|
||||
`__kmpc_omp_task_begin_if0` function and the "end if0 task" event at the
|
||||
beginning of `__kmpc_omp_task_complete_if0`. This state will be shown as
|
||||
*Task: Running task if0*.
|
||||
|
@ -7,11 +7,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1701968480,
|
||||
"narHash": "sha256-YoKN8FZllNQfpEpMqGOBv77kp9J0mlVRlhixWbcDqWg=",
|
||||
"lastModified": 1705310446,
|
||||
"narHash": "sha256-PaPnkGotb2omIV6OsS72MGkqNN6Q/iHLlXQZ6S3vWOY=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "c4d5135fde108401417fdcdf5e1c8d11aeca4f32",
|
||||
"revCount": 931,
|
||||
"rev": "3b21a32d835ff06741d5d59cd023ff2ae1ecb19f",
|
||||
"revCount": 932,
|
||||
"type": "git",
|
||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||
},
|
||||
|
35
flake.nix
35
flake.nix
@ -7,11 +7,15 @@
|
||||
|
||||
outputs = { self, nixpkgs, bscpkgs }:
|
||||
let
|
||||
# Set to true to replace all libovni in all runtimes with the current
|
||||
# source. Causes large rebuilds on changes of ovni.
|
||||
useLocalOvni = false;
|
||||
|
||||
ovniOverlay = final: prev: {
|
||||
nosv = prev.nosv.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "6a63fd4378ba458243dda3159500c1450edf0e82";
|
||||
gitCommit = "9abad7d31476e97842d3b42f1fc1fb03d4cf817b";
|
||||
};
|
||||
nanos6 = prev.nanos6.override {
|
||||
useGit = true;
|
||||
@ -23,13 +27,27 @@
|
||||
gitBranch = "master";
|
||||
gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f";
|
||||
};
|
||||
clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "9dc4a4deea5e09850435782026eaae2f5290d886";
|
||||
};
|
||||
|
||||
# Use a fixed commit for libovni
|
||||
ovniFixed = prev.ovni.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "68fc8b0eba299c3a7fa3833ace2c94933a26749e";
|
||||
};
|
||||
# Build with the current source
|
||||
ovni = prev.ovni.overrideAttrs (old: rec {
|
||||
ovniLocal = prev.ovni.overrideAttrs (old: rec {
|
||||
pname = "ovni-local";
|
||||
version = if self ? shortRev then self.shortRev else "dirty";
|
||||
src = self;
|
||||
cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ];
|
||||
});
|
||||
# Select correct ovni for libovni
|
||||
ovni = if (useLocalOvni) then final.ovniLocal else final.ovniFixed;
|
||||
};
|
||||
pkgs = import nixpkgs {
|
||||
system = "x86_64-linux";
|
||||
@ -51,12 +69,12 @@
|
||||
];
|
||||
lib = pkgs.lib;
|
||||
in {
|
||||
packages.x86_64-linux.ovniPackages = rec {
|
||||
packages.x86_64-linux.ovniPackages = {
|
||||
# Allow inspection of packages from the command line
|
||||
inherit pkgs;
|
||||
} // rec {
|
||||
# Build with the current source
|
||||
local = pkgs.ovni.overrideAttrs (old: {
|
||||
pname = "ovni-local";
|
||||
src = self;
|
||||
});
|
||||
local = pkgs.ovniLocal;
|
||||
|
||||
# Build in Debug mode
|
||||
debug = local.overrideAttrs (old: {
|
||||
@ -97,12 +115,13 @@
|
||||
# We need to be able to exit the chroot to run Nanos6 tests, as they
|
||||
# require access to /sys for hwloc
|
||||
__noChroot = true;
|
||||
buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes ]);
|
||||
buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes openmpv ]);
|
||||
cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ];
|
||||
preConfigure = old.preConfigure or "" + ''
|
||||
export NOSV_HOME="${pkgs.nosv}"
|
||||
export NODES_HOME="${pkgs.nodes}"
|
||||
export NANOS6_HOME="${pkgs.nanos6}"
|
||||
export OPENMP_RUNTIME="libompv"
|
||||
'';
|
||||
});
|
||||
|
||||
|
@ -37,6 +37,7 @@ nav:
|
||||
- user/emulation/nanos6.md
|
||||
- user/emulation/tampi.md
|
||||
- user/emulation/mpi.md
|
||||
- user/emulation/openmp.md
|
||||
- user/emulation/events.md
|
||||
- CHANGELOG.md
|
||||
- 'Developer guide':
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "openmp_priv.h"
|
||||
@ -14,53 +14,83 @@
|
||||
enum { PUSH = 1, POP = 2, IGN = 3 };
|
||||
|
||||
static const int fn_table[256][256][3] = {
|
||||
['A'] = {
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED },
|
||||
},
|
||||
['B'] = {
|
||||
['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER },
|
||||
['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER },
|
||||
['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER },
|
||||
['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER },
|
||||
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER },
|
||||
['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER },
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT },
|
||||
['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_PLAIN },
|
||||
['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER_PLAIN },
|
||||
['j'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_JOIN },
|
||||
['J'] = { CH_SUBSYSTEM, POP, ST_BARRIER_JOIN },
|
||||
['f'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_FORK },
|
||||
['F'] = { CH_SUBSYSTEM, POP, ST_BARRIER_FORK },
|
||||
['t'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_TASK },
|
||||
['T'] = { CH_SUBSYSTEM, POP, ST_BARRIER_TASK },
|
||||
['s'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
|
||||
['S'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT },
|
||||
},
|
||||
['I'] = {
|
||||
['a'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_ACQ },
|
||||
['A'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_ACQ },
|
||||
['r'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_REL },
|
||||
['R'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_REL },
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_SECTION },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_SECTION },
|
||||
},
|
||||
['W'] = {
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC },
|
||||
['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT },
|
||||
['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT },
|
||||
['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK },
|
||||
['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK },
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE },
|
||||
['d'] = { CH_SUBSYSTEM, PUSH, ST_WD_DISTRIBUTE },
|
||||
['D'] = { CH_SUBSYSTEM, POP, ST_WD_DISTRIBUTE },
|
||||
['c'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_CHUNK },
|
||||
['C'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_CHUNK },
|
||||
['y'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_INIT },
|
||||
['Y'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_INIT },
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_STATIC },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_STATIC },
|
||||
['e'] = { CH_SUBSYSTEM, PUSH, ST_WD_SECTION },
|
||||
['E'] = { CH_SUBSYSTEM, POP, ST_WD_SECTION },
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_WD_SINGLE },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_WD_SINGLE },
|
||||
},
|
||||
['T'] = {
|
||||
['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS },
|
||||
['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS },
|
||||
['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS },
|
||||
['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS },
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK },
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 },
|
||||
['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC },
|
||||
['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC },
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
|
||||
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT },
|
||||
['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT },
|
||||
['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD },
|
||||
['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD },
|
||||
['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC },
|
||||
['c'] = { CH_SUBSYSTEM, PUSH, ST_TASK_CHECK_DEPS },
|
||||
['C'] = { CH_SUBSYSTEM, POP, ST_TASK_CHECK_DEPS },
|
||||
['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC },
|
||||
['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC },
|
||||
['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS },
|
||||
['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS },
|
||||
['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP },
|
||||
['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP },
|
||||
['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC },
|
||||
['r'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RELEASE_DEPS },
|
||||
['R'] = { CH_SUBSYSTEM, POP, ST_TASK_RELEASE_DEPS },
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN },
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN_IF0 },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN_IF0 },
|
||||
['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE },
|
||||
['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE },
|
||||
['g'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKGROUP },
|
||||
['G'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKGROUP },
|
||||
['t'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT },
|
||||
['T'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT },
|
||||
['w'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT_DEPS },
|
||||
['W'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT_DEPS },
|
||||
['y'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKYIELD },
|
||||
['Y'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKYIELD },
|
||||
},
|
||||
['A'] = {
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_RT_ATTACHED },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_RT_ATTACHED },
|
||||
},
|
||||
['M'] = {
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_INTERNAL },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_INTERNAL },
|
||||
['u'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_USER },
|
||||
['U'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_USER },
|
||||
},
|
||||
['H'] = {
|
||||
['['] = { CH_SUBSYSTEM, PUSH, ST_RT_WORKER_LOOP },
|
||||
[']'] = { CH_SUBSYSTEM, POP, ST_RT_WORKER_LOOP },
|
||||
},
|
||||
['C'] = {
|
||||
['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_INIT },
|
||||
['I'] = { CH_SUBSYSTEM, POP, ST_RT_INIT },
|
||||
['f'] = { CH_SUBSYSTEM, PUSH, ST_RT_FORK_CALL },
|
||||
['F'] = { CH_SUBSYSTEM, POP, ST_RT_FORK_CALL },
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef OPENMP_PRIV_H
|
||||
@ -15,28 +15,42 @@ enum openmp_chan {
|
||||
CH_MAX,
|
||||
};
|
||||
|
||||
|
||||
enum openmp_function_values {
|
||||
ST_ATTACHED = 1,
|
||||
ST_JOIN_BARRIER,
|
||||
ST_BARRIER,
|
||||
ST_TASKING_BARRIER,
|
||||
ST_SPIN_WAIT,
|
||||
ST_FOR_STATIC,
|
||||
ST_FOR_DYNAMIC_INIT,
|
||||
ST_FOR_DYNAMIC_CHUNK,
|
||||
ST_SINGLE,
|
||||
ST_RELEASE_DEPS,
|
||||
ST_TASKWAIT_DEPS,
|
||||
ST_INVOKE_TASK,
|
||||
ST_INVOKE_TASK_IF0,
|
||||
ST_BARRIER_FORK = 1,
|
||||
ST_BARRIER_JOIN,
|
||||
ST_BARRIER_PLAIN,
|
||||
ST_BARRIER_SPIN_WAIT,
|
||||
ST_BARRIER_TASK,
|
||||
/* Critical */
|
||||
ST_CRITICAL_ACQ,
|
||||
ST_CRITICAL_REL,
|
||||
ST_CRITICAL_SECTION,
|
||||
/* Work-distribution */
|
||||
ST_WD_DISTRIBUTE,
|
||||
ST_WD_FOR_DYNAMIC_CHUNK,
|
||||
ST_WD_FOR_DYNAMIC_INIT,
|
||||
ST_WD_FOR_STATIC,
|
||||
ST_WD_SECTION,
|
||||
ST_WD_SINGLE,
|
||||
/* Task */
|
||||
ST_TASK_ALLOC,
|
||||
ST_TASK_SCHEDULE,
|
||||
ST_TASKWAIT,
|
||||
ST_TASKYIELD,
|
||||
ST_TASK_CHECK_DEPS,
|
||||
ST_TASK_DUP_ALLOC,
|
||||
ST_CHECK_DEPS,
|
||||
ST_TASKGROUP,
|
||||
ST_TASK_RELEASE_DEPS,
|
||||
ST_TASK_RUN,
|
||||
ST_TASK_RUN_IF0,
|
||||
ST_TASK_SCHEDULE,
|
||||
ST_TASK_TASKGROUP,
|
||||
ST_TASK_TASKWAIT,
|
||||
ST_TASK_TASKWAIT_DEPS,
|
||||
ST_TASK_TASKYIELD,
|
||||
/* Runtime */
|
||||
ST_RT_ATTACHED,
|
||||
ST_RT_FORK_CALL,
|
||||
ST_RT_INIT,
|
||||
ST_RT_MICROTASK_INTERNAL,
|
||||
ST_RT_MICROTASK_USER,
|
||||
ST_RT_WORKER_LOOP,
|
||||
};
|
||||
|
||||
struct openmp_thread {
|
||||
|
@ -26,29 +26,44 @@ static const char model_name[] = "openmp";
|
||||
enum { model_id = 'P' };
|
||||
|
||||
static struct ev_decl model_evlist[] = {
|
||||
PAIR_E("PA[", "PA]", "the attached state")
|
||||
PAIR_B("PBb", "PBB", "plain barrier")
|
||||
PAIR_B("PBj", "PBJ", "join barrier")
|
||||
PAIR_B("PBf", "PBF", "fork barrier")
|
||||
PAIR_B("PBt", "PBT", "tasking barrier")
|
||||
PAIR_B("PBs", "PBS", "spin wait")
|
||||
|
||||
PAIR_E("PBj", "PBJ", "a join barrier")
|
||||
PAIR_E("PBb", "PBB", "a barrier")
|
||||
PAIR_E("PBt", "PBT", "a tasking barrier")
|
||||
PAIR_E("PBs", "PBS", "a spin wait")
|
||||
PAIR_B("PIa", "PIA", "critical acquiring")
|
||||
PAIR_B("PIr", "PIR", "critical releasing")
|
||||
PAIR_B("PI[", "PI]", "critical section")
|
||||
|
||||
PAIR_B("PWs", "PWS", "static for")
|
||||
PAIR_B("PWd", "PWD", "dynamic for init")
|
||||
PAIR_B("PWd", "PWD", "distribute")
|
||||
PAIR_B("PWy", "PWY", "dynamic for init")
|
||||
PAIR_B("PWc", "PWC", "dynamic for chunk")
|
||||
PAIR_B("PWs", "PWS", "static for")
|
||||
PAIR_B("PWe", "PWE", "section")
|
||||
PAIR_B("PWi", "PWI", "single")
|
||||
|
||||
PAIR_B("PTr", "PTR", "releasing task dependencies")
|
||||
PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
|
||||
PAIR_B("PT[", "PT]", "invoking a task")
|
||||
PAIR_B("PTi", "PTI", "invoking an if0 task")
|
||||
PAIR_B("PTa", "PTA", "task allocation")
|
||||
PAIR_B("PTs", "PTS", "scheduling a task")
|
||||
PAIR_E("PTt", "PTT", "a taskwait")
|
||||
PAIR_E("PTy", "PTY", "a taskyield")
|
||||
PAIR_B("PTd", "PTD", "duplicating a task")
|
||||
PAIR_B("PTc", "PTC", "checking task dependencies")
|
||||
PAIR_E("PTg", "PTG", "a taskgroup")
|
||||
PAIR_B("PTd", "PTD", "duplicating a task")
|
||||
PAIR_B("PTr", "PTR", "releasing task dependencies")
|
||||
PAIR_B("PT[", "PT]", "running a task")
|
||||
PAIR_B("PTi", "PTI", "running an if0 task")
|
||||
PAIR_B("PTs", "PTS", "scheduling a task")
|
||||
PAIR_B("PTg", "PTG", "a taskgroup")
|
||||
PAIR_B("PTt", "PTT", "a taskwait")
|
||||
PAIR_B("PTw", "PTW", "waiting for taskwait dependencies")
|
||||
PAIR_B("PTy", "PTY", "a taskyield")
|
||||
|
||||
PAIR_E("PA[", "PA]", "the attached state")
|
||||
|
||||
PAIR_B("PMi", "PMI", "microtask internal")
|
||||
PAIR_B("PMu", "PMU", "microtask user code")
|
||||
|
||||
PAIR_B("PH[", "PH]", "worker loop")
|
||||
|
||||
PAIR_B("PCf", "PCF", "fork call")
|
||||
PAIR_B("PCi", "PCI", "initialization")
|
||||
|
||||
{ NULL, NULL },
|
||||
};
|
||||
@ -75,6 +90,10 @@ static const int chan_stack[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = 1,
|
||||
};
|
||||
|
||||
static const int chan_dup[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = 1,
|
||||
};
|
||||
|
||||
/* ----------------- pvt ------------------ */
|
||||
|
||||
static const int pvt_type[CH_MAX] = {
|
||||
@ -86,26 +105,42 @@ static const char *pcf_prefix[CH_MAX] = {
|
||||
};
|
||||
|
||||
static const struct pcf_value_label openmp_subsystem_values[] = {
|
||||
{ ST_ATTACHED, "Attached" },
|
||||
{ ST_JOIN_BARRIER, "Join barrier" },
|
||||
{ ST_BARRIER, "Barrier" },
|
||||
{ ST_TASKING_BARRIER, "Tasking barrier" },
|
||||
{ ST_SPIN_WAIT, "Spin wait" },
|
||||
{ ST_FOR_STATIC, "For static" },
|
||||
{ ST_FOR_DYNAMIC_INIT, "For dynamic init" },
|
||||
{ ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" },
|
||||
{ ST_SINGLE, "Single" },
|
||||
{ ST_RELEASE_DEPS, "Release deps" },
|
||||
{ ST_TASKWAIT_DEPS, "Taskwait deps" },
|
||||
{ ST_INVOKE_TASK, "Invoke task" },
|
||||
{ ST_INVOKE_TASK_IF0, "Invoke task if0" },
|
||||
{ ST_TASK_ALLOC, "Task alloc" },
|
||||
{ ST_TASK_SCHEDULE, "Task schedule" },
|
||||
{ ST_TASKWAIT, "Taskwait" },
|
||||
{ ST_TASKYIELD, "Taskyield" },
|
||||
{ ST_TASK_DUP_ALLOC, "Task dup alloc" },
|
||||
{ ST_CHECK_DEPS, "Check deps" },
|
||||
{ ST_TASKGROUP, "Taskgroup" },
|
||||
/* Work-distribution */
|
||||
{ ST_WD_DISTRIBUTE, "Work-distribution: Distribute" },
|
||||
{ ST_WD_FOR_DYNAMIC_CHUNK, "Work-distribution: Dynamic for chunk" },
|
||||
{ ST_WD_FOR_DYNAMIC_INIT, "Work-distribution: Dynamic for initialization" },
|
||||
{ ST_WD_FOR_STATIC, "Work-distribution: Static for chunk" },
|
||||
{ ST_WD_SECTION, "Work-distribution: Section" },
|
||||
{ ST_WD_SINGLE, "Work-distribution: Single" },
|
||||
/* Task */
|
||||
{ ST_TASK_ALLOC, "Task: Allocation" },
|
||||
{ ST_TASK_CHECK_DEPS, "Task: Check deps" },
|
||||
{ ST_TASK_DUP_ALLOC, "Task: Duplicating" },
|
||||
{ ST_TASK_RELEASE_DEPS, "Task: Releasing deps" },
|
||||
{ ST_TASK_RUN, "Task: Running task" },
|
||||
{ ST_TASK_RUN_IF0, "Task: Running task if0" },
|
||||
{ ST_TASK_SCHEDULE, "Task: Scheduling" },
|
||||
{ ST_TASK_TASKGROUP, "Task: Taskgroup" },
|
||||
{ ST_TASK_TASKWAIT, "Task: Taskwait" },
|
||||
{ ST_TASK_TASKWAIT_DEPS, "Task: Taskwait deps" },
|
||||
{ ST_TASK_TASKYIELD, "Task: Taskyield" },
|
||||
/* Critical */
|
||||
{ ST_CRITICAL_ACQ, "Critical: Acquiring" },
|
||||
{ ST_CRITICAL_REL, "Critical: Releasing" },
|
||||
{ ST_CRITICAL_SECTION, "Critical: Section" },
|
||||
/* Barrier */
|
||||
{ ST_BARRIER_FORK, "Barrier: Fork" },
|
||||
{ ST_BARRIER_JOIN, "Barrier: Join" },
|
||||
{ ST_BARRIER_PLAIN, "Barrier: Plain" },
|
||||
{ ST_BARRIER_TASK, "Barrier: Task" },
|
||||
{ ST_BARRIER_SPIN_WAIT, "Barrier: Spin wait" },
|
||||
/* Runtime */
|
||||
{ ST_RT_ATTACHED, "Runtime: Attached" },
|
||||
{ ST_RT_FORK_CALL, "Runtime: Fork call" },
|
||||
{ ST_RT_INIT, "Runtime: Initialization" },
|
||||
{ ST_RT_MICROTASK_INTERNAL, "Runtime: Internal microtask" },
|
||||
{ ST_RT_MICROTASK_USER, "Runtime: User microtask" },
|
||||
{ ST_RT_WORKER_LOOP, "Runtime: Worker main loop" },
|
||||
{ -1, NULL },
|
||||
};
|
||||
|
||||
@ -114,7 +149,7 @@ static const struct pcf_value_label *pcf_labels[CH_MAX] = {
|
||||
};
|
||||
|
||||
static const long prv_flags[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = PRV_SKIPDUP,
|
||||
[CH_SUBSYSTEM] = PRV_EMITDUP,
|
||||
};
|
||||
|
||||
static const struct model_pvt_spec pvt_spec = {
|
||||
@ -127,7 +162,7 @@ static const struct model_pvt_spec pvt_spec = {
|
||||
/* ----------------- tracking ------------------ */
|
||||
|
||||
static const int th_track[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = TRACK_TH_RUN,
|
||||
[CH_SUBSYSTEM] = TRACK_TH_ACT,
|
||||
};
|
||||
|
||||
static const int cpu_track[CH_MAX] = {
|
||||
@ -141,6 +176,7 @@ static const struct model_chan_spec th_chan = {
|
||||
.prefix = model_name,
|
||||
.ch_names = chan_name,
|
||||
.ch_stack = chan_stack,
|
||||
.ch_dup = chan_dup,
|
||||
.pvt = &pvt_spec,
|
||||
.track = th_track,
|
||||
};
|
||||
@ -150,6 +186,7 @@ static const struct model_chan_spec cpu_chan = {
|
||||
.prefix = model_name,
|
||||
.ch_names = chan_name,
|
||||
.ch_stack = chan_stack,
|
||||
.ch_dup = chan_dup,
|
||||
.pvt = &pvt_spec,
|
||||
.track = cpu_track,
|
||||
};
|
||||
|
@ -4,3 +4,4 @@
|
||||
add_subdirectory(nanos6)
|
||||
add_subdirectory(nodes)
|
||||
add_subdirectory(nosv)
|
||||
add_subdirectory(openmp)
|
||||
|
50
test/rt/openmp/CMakeLists.txt
Normal file
50
test/rt/openmp/CMakeLists.txt
Normal file
@ -0,0 +1,50 @@
|
||||
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND)
|
||||
check_linker_flag(C "-fopenmp=libompv" OPENMPV_LINKER_FOUND)
|
||||
cmake_path(GET CMAKE_C_COMPILER PARENT_PATH CMAKE_C_COMPILER_PATH)
|
||||
|
||||
if(NOT OPENMPV_COMPILER_FOUND OR NOT OPENMPV_LINKER_FOUND)
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable OpenMP-V RT tests")
|
||||
else()
|
||||
message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling OpenMP-V RT tests")
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
|
||||
function(openmp_rt_test)
|
||||
ovni_test(${ARGN})
|
||||
target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv"
|
||||
"-no-pedantic")
|
||||
target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv")
|
||||
target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m")
|
||||
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
|
||||
ENVIRONMENT "OMP_OVNI=1")
|
||||
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
|
||||
ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni")
|
||||
endfunction()
|
||||
|
||||
openmp_rt_test(barrier-explicit.c)
|
||||
openmp_rt_test(critical.c)
|
||||
openmp_rt_test(if0-nested-task.c)
|
||||
openmp_rt_test(if0.c)
|
||||
openmp_rt_test(multi-parallels.c)
|
||||
openmp_rt_test(parallel-for.c)
|
||||
openmp_rt_test(parallel-loop.c)
|
||||
openmp_rt_test(parallel-nested.c)
|
||||
openmp_rt_test(parallel-task.c)
|
||||
openmp_rt_test(sections.c)
|
||||
openmp_rt_test(simple-task.c)
|
||||
openmp_rt_test(task.c)
|
||||
openmp_rt_test(taskloop.c)
|
||||
openmp_rt_test(taskwait.c)
|
||||
openmp_rt_test(team-distribute.c)
|
||||
openmp_rt_test(worksharing-and-tasks.c)
|
||||
openmp_rt_test(worksharing-mix.c)
|
||||
openmp_rt_test(worksharing-task.c)
|
||||
openmp_rt_test(worksharing.c)
|
||||
openmp_rt_test(worksharing01.c)
|
||||
openmp_rt_test(worksharing02.c)
|
||||
openmp_rt_test(worksharing03.c)
|
47
test/rt/openmp/barrier-explicit.c
Normal file
47
test/rt/openmp/barrier-explicit.c
Normal file
@ -0,0 +1,47 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "compat.h"
|
||||
|
||||
#define N 100
|
||||
|
||||
static void
|
||||
dummy_work(double *x, int i)
|
||||
{
|
||||
sleep_us(i);
|
||||
x[i] += sqrt((double) i);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
double x[N] = { 0 };
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp single
|
||||
{
|
||||
for (int i = 0; i < N; i++) {
|
||||
#pragma omp task shared(x)
|
||||
dummy_work(x, i);
|
||||
}
|
||||
}
|
||||
|
||||
sleep_us(200);
|
||||
#pragma omp barrier
|
||||
sleep_us(1000);
|
||||
#pragma omp barrier
|
||||
|
||||
#pragma omp single
|
||||
{
|
||||
for (int i = 0; i < N; i++) {
|
||||
#pragma omp task shared(x)
|
||||
dummy_work(x, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < N; i++)
|
||||
sum += x[i];
|
||||
|
||||
printf("sum = %e\n", sum);
|
||||
return 0;
|
||||
}
|
16
test/rt/openmp/critical.c
Normal file
16
test/rt/openmp/critical.c
Normal file
@ -0,0 +1,16 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
sleep_us(1000);
|
||||
|
||||
#pragma omp critical
|
||||
sleep_us(200);
|
||||
|
||||
sleep_us(1000);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
20
test/rt/openmp/if0-nested-task.c
Normal file
20
test/rt/openmp/if0-nested-task.c
Normal file
@ -0,0 +1,20 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp single
|
||||
{
|
||||
#pragma omp task if(0)
|
||||
{
|
||||
#pragma omp task
|
||||
{
|
||||
sleep_us(1000);
|
||||
}
|
||||
#pragma omp taskwait
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
17
test/rt/openmp/if0.c
Normal file
17
test/rt/openmp/if0.c
Normal file
@ -0,0 +1,17 @@
|
||||
#include <omp.h>
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp single
|
||||
{
|
||||
#pragma omp task if(0)
|
||||
{
|
||||
sleep_us(1000);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
13
test/rt/openmp/multi-parallels.c
Normal file
13
test/rt/openmp/multi-parallels.c
Normal file
@ -0,0 +1,13 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
for (int i = 0; i < 10; i++) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
sleep_us(1000);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
29
test/rt/openmp/parallel-for.c
Normal file
29
test/rt/openmp/parallel-for.c
Normal file
@ -0,0 +1,29 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
|
||||
#pragma omp for schedule(dynamic, 1)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(i);
|
||||
}
|
||||
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
|
||||
#pragma omp for schedule(dynamic, 1)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(i);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
14
test/rt/openmp/parallel-loop.c
Normal file
14
test/rt/openmp/parallel-loop.c
Normal file
@ -0,0 +1,14 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp loop
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
22
test/rt/openmp/parallel-nested.c
Normal file
22
test/rt/openmp/parallel-nested.c
Normal file
@ -0,0 +1,22 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for schedule(dynamic, 1)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(i);
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for schedule(dynamic, 1)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
26
test/rt/openmp/parallel-task.c
Normal file
26
test/rt/openmp/parallel-task.c
Normal file
@ -0,0 +1,26 @@
|
||||
#include "compat.h"
|
||||
|
||||
static void foo(void)
|
||||
{
|
||||
#pragma omp for schedule(dynamic, 1)
|
||||
for (int i = 0; i < 100; i++)
|
||||
sleep_us(i);
|
||||
|
||||
#pragma omp single
|
||||
for (int i = 0; i < 100; i++)
|
||||
{
|
||||
#pragma omp task
|
||||
sleep_us(10);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
foo();
|
||||
foo();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
51
test/rt/openmp/sections.c
Normal file
51
test/rt/openmp/sections.c
Normal file
@ -0,0 +1,51 @@
|
||||
#include <stdio.h>
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{ sleep_us(1001); printf("1001\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(1002); printf("1002\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(1003); printf("1003\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(1004); printf("1004\n"); }
|
||||
#pragma omp section
|
||||
sleep_us(1005);
|
||||
#pragma omp section
|
||||
sleep_us(1006);
|
||||
#pragma omp section
|
||||
sleep_us(1007);
|
||||
#pragma omp section
|
||||
sleep_us(1008);
|
||||
#pragma omp section
|
||||
sleep_us(1009);
|
||||
#pragma omp section
|
||||
sleep_us(1010);
|
||||
#pragma omp section
|
||||
sleep_us(1011);
|
||||
#pragma omp section
|
||||
sleep_us(1012);
|
||||
#pragma omp section
|
||||
sleep_us(1013);
|
||||
#pragma omp section
|
||||
sleep_us(1014);
|
||||
#pragma omp section
|
||||
sleep_us(1015);
|
||||
#pragma omp section
|
||||
sleep_us(1016);
|
||||
#pragma omp section
|
||||
sleep_us(1017);
|
||||
#pragma omp section
|
||||
sleep_us(1018);
|
||||
#pragma omp section
|
||||
sleep_us(1019);
|
||||
#pragma omp section
|
||||
sleep_us(1020);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
23
test/rt/openmp/simple-task.c
Normal file
23
test/rt/openmp/simple-task.c
Normal file
@ -0,0 +1,23 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int a;
|
||||
int *p = &a;
|
||||
|
||||
#pragma omp parallel
|
||||
#pragma omp single
|
||||
{
|
||||
#pragma omp task depend(out : p[0])
|
||||
{
|
||||
sleep_us(1000);
|
||||
}
|
||||
for (int i = 0; i < 10000; i++)
|
||||
{
|
||||
#pragma omp task depend(in : p[0])
|
||||
sleep_us(1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
19
test/rt/openmp/task.c
Normal file
19
test/rt/openmp/task.c
Normal file
@ -0,0 +1,19 @@
|
||||
#include <stdio.h>
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
for (int i = 0; i < 10; i++) {
|
||||
#pragma omp task
|
||||
{
|
||||
printf("%d\n", i);
|
||||
sleep_us(100);
|
||||
}
|
||||
}
|
||||
#pragma omp barrier
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
17
test/rt/openmp/taskloop.c
Normal file
17
test/rt/openmp/taskloop.c
Normal file
@ -0,0 +1,17 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp single
|
||||
{
|
||||
#pragma omp taskloop
|
||||
for (int i = 0; i < 10000; i++)
|
||||
{
|
||||
#pragma omp task
|
||||
sleep_us(1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
42
test/rt/openmp/taskwait.c
Normal file
42
test/rt/openmp/taskwait.c
Normal file
@ -0,0 +1,42 @@
|
||||
#include "compat.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp single
|
||||
{
|
||||
#pragma omp task label("A")
|
||||
{
|
||||
sleep_us(5000);
|
||||
printf("A\n");
|
||||
}
|
||||
|
||||
#pragma omp task label("B")
|
||||
{
|
||||
#pragma omp task label("B1")
|
||||
{
|
||||
sleep_us(2000);
|
||||
printf("B1\n");
|
||||
}
|
||||
|
||||
/* Shouldn't wait for task A */
|
||||
#pragma omp taskwait
|
||||
|
||||
#pragma omp task
|
||||
{
|
||||
sleep_us(1000);
|
||||
printf("B2\n");
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp task label("C")
|
||||
{
|
||||
printf("C\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Expected output C B1 B2 A */
|
||||
|
||||
return 0;
|
||||
}
|
14
test/rt/openmp/team-distribute.c
Normal file
14
test/rt/openmp/team-distribute.c
Normal file
@ -0,0 +1,14 @@
|
||||
#include <omp.h>
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp teams num_teams(2)
|
||||
{
|
||||
#pragma omp distribute parallel for
|
||||
for (volatile int i = 0; i < 1000; i++)
|
||||
sleep_us(100 + i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
34
test/rt/openmp/worksharing-and-tasks.c
Normal file
34
test/rt/openmp/worksharing-and-tasks.c
Normal file
@ -0,0 +1,34 @@
|
||||
#include <stdio.h>
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
//#pragma omp single nowait
|
||||
for (int i = 0; i < 100; i++) {
|
||||
#pragma omp task
|
||||
sleep_us(10);
|
||||
}
|
||||
|
||||
/* Wait a bit for task allocation */
|
||||
sleep_us(1000);
|
||||
|
||||
/* Occupy 4 CPUs with sections */
|
||||
#pragma omp sections nowait
|
||||
{
|
||||
#pragma omp section
|
||||
{ sleep_us(1001); printf("1001\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(1002); printf("1002\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(1003); printf("1003\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(1004); printf("1004\n"); }
|
||||
}
|
||||
|
||||
#pragma omp taskwait
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
66
test/rt/openmp/worksharing-mix.c
Normal file
66
test/rt/openmp/worksharing-mix.c
Normal file
@ -0,0 +1,66 @@
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include "compat.h"
|
||||
|
||||
/* Test several work-distribution and task constructs, so we can generate a
|
||||
* trace that includes most of the states. */
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
|
||||
#pragma omp sections
|
||||
{
|
||||
#pragma omp section
|
||||
{ sleep_us(101); printf("101\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(102); printf("102\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(103); printf("103\n"); }
|
||||
#pragma omp section
|
||||
{ sleep_us(104); printf("104\n"); }
|
||||
}
|
||||
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
|
||||
#pragma omp single
|
||||
for (int i = 0; i < 100; i++)
|
||||
{
|
||||
#pragma omp task
|
||||
sleep_us(10);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp critical
|
||||
sleep_us(20);
|
||||
|
||||
#pragma omp barrier
|
||||
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
#pragma omp for schedule(dynamic, 1)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(i);
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Crashes OpenMP-V runtime
|
||||
//#pragma omp distribute parallel for
|
||||
//for (int i = 0; i < 1000; i++) {
|
||||
// sleep_us(1);
|
||||
//}
|
||||
|
||||
return 0;
|
||||
}
|
25
test/rt/openmp/worksharing-task.c
Normal file
25
test/rt/openmp/worksharing-task.c
Normal file
@ -0,0 +1,25 @@
|
||||
#include <omp.h>
|
||||
#include "compat.h"
|
||||
|
||||
static void foo(void)
|
||||
{
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; ++i)
|
||||
{
|
||||
#pragma omp task
|
||||
{
|
||||
sleep_us(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
foo();
|
||||
foo();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
16
test/rt/openmp/worksharing.c
Normal file
16
test/rt/openmp/worksharing.c
Normal file
@ -0,0 +1,16 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for schedule(dynamic) ordered label("omp for dynamic")
|
||||
for (int i = 0; i < 100; i++)
|
||||
sleep_us(100);
|
||||
|
||||
#pragma omp single label("single")
|
||||
sleep_us(1000);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
11
test/rt/openmp/worksharing01.c
Normal file
11
test/rt/openmp/worksharing01.c
Normal file
@ -0,0 +1,11 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp for schedule(static, 30)
|
||||
for (int i = 0; i < 100; i++)
|
||||
sleep_us(10);
|
||||
|
||||
return 0;
|
||||
}
|
12
test/rt/openmp/worksharing02.c
Normal file
12
test/rt/openmp/worksharing02.c
Normal file
@ -0,0 +1,12 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(1)
|
||||
#pragma omp distribute dist_schedule(static, 30)
|
||||
for (int i = 0; i < 100; i++)
|
||||
sleep_us(10);
|
||||
|
||||
return 0;
|
||||
}
|
11
test/rt/openmp/worksharing03.c
Normal file
11
test/rt/openmp/worksharing03.c
Normal file
@ -0,0 +1,11 @@
|
||||
#include "compat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp for schedule(dynamic)
|
||||
for (int i = 0; i < 100; i++)
|
||||
sleep_us(10);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user