diff --git a/CHANGELOG.md b/CHANGELOG.md index 97e3d24..b225e58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add OpenMP model (`P`) at version 1.1.0 (currently it only supports subsystems + and only works with the OpenMP-V runtime, on top of nOS-V). + ### Changed - Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}. diff --git a/cfg/cpu/openmp/subsystem.cfg b/cfg/cpu/openmp/subsystem.cfg index 21803d4..df3ae5c 100644 --- a/cfg/cpu/openmp/subsystem.cfg +++ b/cfg/cpu/openmp/subsystem.cfg @@ -16,6 +16,8 @@ window_height 150 window_comm_lines_enabled true window_flags_enabled false window_noncolor_mode true +window_custom_color_enabled true +window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126} window_logical_filtered true window_physical_filtered false window_comm_fromto true diff --git a/cfg/thread/openmp/subsystem.cfg b/cfg/thread/openmp/subsystem.cfg index 849ac61..917d077 100644 --- a/cfg/thread/openmp/subsystem.cfg +++ b/cfg/thread/openmp/subsystem.cfg @@ -4,9 +4,9 @@ ConfigFile.NumWindows: 1 ################################################################################ -< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread > +< NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the ACTIVE thread > ################################################################################ -window_name Thread: OpenMP subsystem of the RUNNING thread +window_name Thread: OpenMP subsystem of the ACTIVE thread window_type single window_id 1 window_position_x 0 @@ -16,6 +16,8 @@ window_height 150 window_comm_lines_enabled true window_flags_enabled false window_noncolor_mode true +window_custom_color_enabled true +window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126} window_logical_filtered true window_physical_filtered false window_comm_fromto true @@ -38,5 +40,5 @@ window_labels_to_draw 1 window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } window_filter_module evt_type 1 50 -window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread" +window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the ACTIVE thread" diff --git a/doc/user/emulation/events.md b/doc/user/emulation/events.md index c2521d0..02f0456 100644 --- a/doc/user/emulation/events.md +++ b/doc/user/emulation/events.md @@ -433,86 +433,130 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`: List of events for the model *openmp* with identifier **`P`** at version `1.1.0`:
-
PA[
-
enters the attached state
-
PA]
-
leaves the attached state
-
PBj
-
enters a join barrier
-
PBJ
-
leaves a join barrier
PBb
-
enters a barrier
+
begins plain barrier
PBB
-
leaves a barrier
+
ceases plain barrier
+
PBj
+
begins join barrier
+
PBJ
+
ceases join barrier
+
PBf
+
begins fork barrier
+
PBF
+
ceases fork barrier
PBt
-
enters a tasking barrier
+
begins tasking barrier
PBT
-
leaves a tasking barrier
+
ceases tasking barrier
PBs
-
enters a spin wait
+
begins spin wait
PBS
-
leaves a spin wait
-
PWs
-
begins static for
-
PWS
-
ceases static for
+
ceases spin wait
+
PIa
+
begins critical acquiring
+
PIA
+
ceases critical acquiring
+
PIr
+
begins critical releasing
+
PIR
+
ceases critical releasing
+
PI[
+
begins critical section
+
PI]
+
ceases critical section
PWd
-
begins dynamic for init
+
begins distribute
PWD
+
ceases distribute
+
PWy
+
begins dynamic for init
+
PWY
ceases dynamic for init
PWc
begins dynamic for chunk
PWC
ceases dynamic for chunk
+
PWs
+
begins static for
+
PWS
+
ceases static for
+
PWe
+
begins section
+
PWE
+
ceases section
PWi
begins single
PWI
ceases single
-
PTr
-
begins releasing task dependencies
-
PTR
-
ceases releasing task dependencies
-
PTw
-
begins waiting for taskwait dependencies
-
PTW
-
ceases waiting for taskwait dependencies
-
PT[
-
begins invoking a task
-
PT]
-
ceases invoking a task
-
PTi
-
begins invoking an if0 task
-
PTI
-
ceases invoking an if0 task
PTa
begins task allocation
PTA
ceases task allocation
-
PTs
-
begins scheduling a task
-
PTS
-
ceases scheduling a task
-
PTt
-
enters a taskwait
-
PTT
-
leaves a taskwait
-
PTy
-
enters a taskyield
-
PTY
-
leaves a taskyield
-
PTd
-
begins duplicating a task
-
PTD
-
ceases duplicating a task
PTc
begins checking task dependencies
PTC
ceases checking task dependencies
+
PTd
+
begins duplicating a task
+
PTD
+
ceases duplicating a task
+
PTr
+
begins releasing task dependencies
+
PTR
+
ceases releasing task dependencies
+
PT[
+
begins running a task
+
PT]
+
ceases running a task
+
PTi
+
begins running an if0 task
+
PTI
+
ceases running an if0 task
+
PTs
+
begins scheduling a task
+
PTS
+
ceases scheduling a task
PTg
-
enters a taskgroup
+
begins a taskgroup
PTG
-
leaves a taskgroup
+
ceases a taskgroup
+
PTt
+
begins a taskwait
+
PTT
+
ceases a taskwait
+
PTw
+
begins waiting for taskwait dependencies
+
PTW
+
ceases waiting for taskwait dependencies
+
PTy
+
begins a taskyield
+
PTY
+
ceases a taskyield
+
PA[
+
enters the attached state
+
PA]
+
leaves the attached state
+
PMi
+
begins microtask internal
+
PMI
+
ceases microtask internal
+
PMu
+
begins microtask user code
+
PMU
+
ceases microtask user code
+
PH[
+
begins worker loop
+
PH]
+
ceases worker loop
+
PCf
+
begins fork call
+
PCF
+
ceases fork call
+
PCi
+
begins initialization
+
PCI
+
ceases initialization
## Model tampi diff --git a/doc/user/emulation/fig/openmp-subsystem.png b/doc/user/emulation/fig/openmp-subsystem.png new file mode 100644 index 0000000..abfc71b Binary files /dev/null and b/doc/user/emulation/fig/openmp-subsystem.png differ diff --git a/doc/user/emulation/openmp.md b/doc/user/emulation/openmp.md index 4806c3b..d176eb7 100644 --- a/doc/user/emulation/openmp.md +++ b/doc/user/emulation/openmp.md @@ -1,164 +1,243 @@ -# OpenMP Model +# OpenMP model -The LLVM OpenMP Runtime is an integral component of the LLVM compiler -infrastructure that provides support for the OpenMP (Open Multi-Processing) -programming model. +The [OpenMP programming model](https://www.openmp.org) is a widely used API and +set of directives for parallel programming, allowing developers to write +multi-threaded and multi-process applications more easily. In this document we +refer to the +[version 5.2 of the OpenMP specification](https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf). -OpenMP is a widely used API and set of directives for parallel programming, -allowing developers to write multi-threaded and multi-process applications more -easily. +The [LLVM OpenMP Runtime](https://openmp.llvm.org/design/Runtimes.html) provides +an implementation of the OpenMP specification as a component of the LLVM +compiler infrastructure. We have modified the LLVM OpenMP runtime to run on top +of the [nOS-V](https://gitlab.bsc.es/nos-v/nos-v) runtime as part of the +[OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **OpenMP-V**. -This documentation is about an OpenMP runtime built on top of [nOS-V][nosv], -leveraging its thread management capabilities while retaining the fundamental -characteristics of the original runtime. +We have added instrumentation events to OpenMP-V designed to be enabled along +the [nOS-V instrumentation](nosv.md). This document describes all the +instrumentation features included in our modified OpenMP-V runtime to identify +what is happening. This data is useful for both users and developers of the +OpenMP runtime to analyze issues and undesired behaviors. -While the modifications introduced to the runtime may appear to be minor, it's -important to note that this enhanced version is not API compatible with the -original runtime. As a result, it is mandatory to use the clang built in the same -[LLVM Project][llvm]. +!!! Note -This document describes all the instrumentation features included in the runtime -by both nOS-V and OpenMP to monitor task execution and the execution flow within -the runtime library to identify what is happening. This data is useful for both -users and developers of the OpenMP runtime to analyze issues and undesired -behaviors. + Instrumenting the original OpenMP runtime from the LLVM project is planned + but is not yet posible. For now you must use the modified OpenMP-V runtime + with nOS-V. -[llvm]: https://pm.bsc.es/llvm-ompss -[nosv]: https://gitlab.bsc.es/nos-v/nos-v +## Enable the instrumentation -## How to Generate Execution Traces +To generate runtime traces, you will have to: -In order to build the OpenMP runtime nOS-V must be provided by using -`PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a -runtime without instrumentation. However, the user may be able to generate -execution traces by enabling nOS-V instrumentation through -`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a -nOS-V installation built with ovni. +1. **Build nOS-V with ovni support:** Refer to the + [nOS-V + documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md). + Typically you should use the `--with-ovni` option at configure time to specify + where ovni is installed. +2. **Build OpenMP-V with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH` + environment variable to specify the nOS-V and ovni installation + when configuring CMake. +3. **Enable the instrumentation in nOS-V at runtime:** Refer to the + [nOS-V documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md) + to find out how to enable the tracing at runtime. Typically you can just set + `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. +4. **Enable the instrumentation of OpenMP-V at runtime:** Set the environment + variable `OMP_OVNI=1`. -Building OpenMP with instrumentation requires to pass ovni pkg-config path to -`PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is -because OpenMP is dependent of nOS-V to generate complete execution traces. +Currently there is only support for the subsystem view, which is documented +below. The view is complemented with the information of [nOS-V views](nosv.md), +as OpenMP-V uses nOS-V tasks to run the workers. -By default, OpenMP will not instrument anything. To enable instrumentation the -user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. +## Subsystem view -The following sections will describe the OpenMP execution trace views and what -information is shown there. - -## nOS-V Task Type - -As said in the previous sections. This OpenMP runtime is built on top of nOS-V. -So the user can explore what does the execution do there. Here we only describe -the task type view. For other views please take a look at the nOS-V chapter. - -In OpenMP, every thread that is launched (main thread included) is shown in a task -type with label "openmp". In a task application, every task call will be seen with -a task type with label "file:line:col" format referring to the pragma location. This -can be changed by using the clause label(string-literal). - -OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for -more information. Nevertheless, the OpenMP task view shows it. - -## OpenMP Subsystem +![Subsystem view example](fig/openmp-subsystem.png) This view illustrates the activities of each thread with different states: -- **Attached**: The thread is attached. +- **Work-distribution subsystem**: Related to work-distribution constructs, + [in Chapter 11][workdis]. -- **Join barrier**: The thread is in the implicit barrier of the parallel region. + - **Distribute**: Running a *Distribute* region. -- **Tasking barrier**: The thread is in the additional tasking barrier trying to - execute tasks. This event happens if executed with KMP_TASKING=1. + - **Dynamic for chunk**: Running a chunk of a dynamic *for*, which often + involve running more than one iteration of the loop. See the + [limitations](#dynamic_for) below. -- **Spin wait**: The thread spin waits for a condition. Usually this event happens - in a barrier while waiting for the other threads to reach the barrier. The thread - also tries to execute tasks. + - **Dynamic for initialization**: Preparing a dynamic *for*. -- **For static**: Executing a for static. The length of the event represents all the - chunks of iterations executed by the thread. See "Limitations" section. + - **Static for chunk**: Executing the assigned iterations of an static + *for*. -- **For dynamic init**: Running the initialization of an OpenMP for dynamic. + - **Single**: Running a *Single* region. All threads of the parallel region + participate. -- **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To - clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and - from 8 to 12, two different events will be shown. See "Limitations" section. + - **Section**: Running a *Section* region. All threads of the parallel region + participate. -- **Single**: Running a Single region. All threads of the parallel region will emit - the event. +- **Task subsystem**: Related to tasking constructs, [in Chapter 12][tasking]. -- **Release deps**: When finishing a task, trying to release dependencies. This - event happens although the task has no dependencies. + - **Allocation**: Allocating the task descriptor. -- **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled. - This appears typically in a task if0 with dependencies or a taskwait with deps. + - **Check deps**: Checking if the task has pending dependencies to be + fulfilled. When all dependencies are fulfilled the task will be scheduled. -- **Invoke task**: Executing a task. + - **Duplicating**: Duplicating the task descriptor in a taskloop. -- **Invoke task if0**: Executing a task if0. + - **Releasing deps**: Releasing dependencies at the end of a task. This + state is always present even if the task has no dependencies. -- **Task alloc**: Allocating the task descriptor. + - **Running task**: Executing a task. -- **Task schedule**: Adding the task to the scheduler. + - **Running task if0**: Executing a task if0. -- **Taskwait**: Running a taskwait. + - **Scheduling**: Adding the task to the scheduler for execution. -- **Taskyield**: Running a taskyield. + - **Taskgroup**: Waiting in a *taskgroup* construct. -- **Task dup alloc**: Duplicating the task descriptor in a taskloop. + - **Taskwait**: Waiting in a *taskwait* construct. -- **Check deps**: Checking if the task has pending dependencies to be fulfilled. This - means that if all dependencies are fulfilled the task will be scheduled. + - **Taskwait deps**: Trying to execute tasks until dependencies have been + fulfilled. This appears typically in a task if0 with dependencies or a + taskwait with deps. + + - **Taskyield**: Performing a *taskyield* construct. -- **Taskgroup**: Running a taskgroup. +- **Critical subsystem**: Related to the *critical* Constuct, in [Section 15.2][critical]. + + - **Acquiring**: Waiting to acquire a *Critical* section. + + - **Section**: Running the *Critical* section. + + - **Releasing**: Waiting to release a *Critical* section. + +- **Barrier subsystem**: Related to barriers, in [Section 15.3][barrier]. + **All barriers can try to execute tasks**. + + - **Barrier: Fork**: Workers wait for a release signal from the master thread to + continue. The master can continue as soon as it signals the workers. It is + done at the beginning of a fork-join region. + + - **Barrier: Join**: The master thread waits until all workers finish their work. + Workers can continue as soon as they signal the master. It is done at the + end of a fork-join region. + + - **Barrier: Plain**: Performing a plain barrier, which waits for a release + signal from the master thread to continue. It is done at the beginning of + a fork-join region, in the `__kmp_join_barrier()` function. + + - **Barrier: Task**: Blocked in an additional tasking barrier *until all previous + tasks have been executed*. Only happens when executed with `KMP_TASKING=1`. + +- **Runtime subsystem**: Internal operations of the runtime. + + - **Attached**: Present after the call to `nosv_attach()` and before + `nosv_detach()`. This state is a hack. + + - **Fork call**: Preparing a parallel section using the fork-join model. + Only called from the master thread. + + - **Init**: Initializing the OpenMP-V runtime. + + - **Internal microtask**: Running a internal OpenMP-V function as a microtask. + + - **User microtask**: Running user code as a microtask in a worker thread. + + - **Worker main Loop**: Running the main loop, where the workers run the + fork barrier, run a microtask and perform a join barrier until there is no + more work. + +!!! Note + + The generated HTML version of the OpenMP 5.2 specification has some parts + missing, so we link directly to the PDF file which may not work in some + browsers. + +[workdis]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.11 +[tasking]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.12 +[critical]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.2 +[barrier]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.3 ## Limitations -By the way how OpenMP is implemented. There are some instrumentation points that -violate ovni subsystem rules. This mostly happens because some directives are lowered -partially in the transformed user code, so it is not easy to wrap them into a -Single-entry single-exit (SESE) region, like we would do with a regular task invocation, -for example. +As the compiler generates the code that perform the calls to the OpenMP-V +runtime, there are some parts of the execution that are complicated to +instrument by just placing a pair of events to delimite a function. -All problematic directives are described here so the user is able to understand what -is being show in the traces +For those cases we use an approximation which is documented in the following +subsections. -- **Task if0**: The lowered user code of a task if0 is: - ... = __kmpc_omp_task_alloc(...); - __kmpc_omp_taskwait_deps_51(...); // If task has dependencies - __kmpc_omp_task_begin_if0(...); - // Call to the user code - omp_task_entry_(...); - __kmpc_omp_task_complete_if0(...); +### Dynamic for - Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As - this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0` - and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent. +The generated code of a *dynamic for* has the following structure: -- **For static**: The lowered user code of a for static is: - // Parallel code - __kmpc_for_static_init_4(...); - for ( i = ...; i <= ...; ++i ) - ; - __kmpc_for_static_fini(...); +```c +__kmpc_dispatch_init_4(...); +while (__kmpc_dispatch_next_4(...)) { + for (i = ...; i <= ...; i++) { + // User code ... + } +} +``` - Ideally, the for loop should be called by the runtime to ensure the SESE structure. As - this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4` - and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent. +The function `__kmpc_dispatch_next_4()` returns `true` if there are more +chunks (group of iterations) to be executed by the thread, otherwise it returns +`false`. -- **For dynamic**: The lowered user code of a for dynamic is: +Ideally we want to instrument each chunk with a pair of begin and end events. - __kmpc_dispatch_init_4(...); - while ( __kmpc_dispatch_next_4(...)) - { - for ( i = ...; i <= ...; ++i ) - ; - } +The problem with the instrumentation is that there is no easy way of determining +if the call to `__kmpc_dispatch_next_4()` is processing the first chunk, just +after `__kmpc_dispatch_init_4()`, or is coming from other chunks due to the +while loop. - Ideally, the for loop should be called by the runtime to ensure the SESE structure. As - this code is generated by the compiler the subsystem view shows: - 1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init** - 2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`. - with the event **For dynamic chunk**. - 3. How long it takes to run a loop iteration chunk between the last and the previous - `__kmpc_dispatch_next_4` call with the event **For dynamic chunk**. +Therefore, from the `__kmpc_dispatch_next_4()` alone, we cannot determine if we +need to only emit a single "begin a new chunk" event or we need to emit the pair +of events "finish the last chunk" and "begin a new one". +So, as a workaround, we emit an event from the end of `__kmpc_dispatch_init_4()` +starting a new chunk (which is fake), and then from `__kmpc_dispatch_next_4()` we +always emit the "finish the last chunk" and "begin a new one" events (unless +there are no more chunks, in which case we don't emit the "begin a new one" +event). + +This will cause an spurious *Work-distribution: Dynamic for chunk* state at the +beginning of each dynamic for, which should be very short and is not really a +chunk. + +### Static for + +The generated code of an *static for* has the following structure: + +```c +__kmpc_for_static_init_4(...); +for (i = ...; i <= ...; i++) { + // User code ... +} +__kmpc_for_static_fini(...); +``` + +As this code is generated by the compiler we cannot easily add the begin/end +pair of events to mark the *Work-distribution: Static for chunk* state. + +We assume that by placing the "begin processing a chunk" event at the end of +`__kmpc_for_static_init_4()` and the "end processing the chunk" event at +the beginning of `__kmpc_for_static_fini()` is equivalent to adding the +events surrounding the for loop. + +### Task if0 + +The generated code of an *if0 task* has the following structure: + +```c +... = __kmpc_omp_task_alloc(...); +__kmpc_omp_taskwait_deps_51(...); // If task has dependencies +__kmpc_omp_task_begin_if0(...); +// Call to the user code +omp_task_entry_(...); +__kmpc_omp_task_complete_if0(...); +``` + +Instead of injecting the begin and end events in the user code, we +approximate it by placing the "begin if0 task" event at the end of the +`__kmpc_omp_task_begin_if0` function and the "end if0 task" event at the +beginning of `__kmpc_omp_task_complete_if0`. This state will be shown as +*Task: Running task if0*. diff --git a/flake.lock b/flake.lock index 8f50bc5..92bc150 100644 --- a/flake.lock +++ b/flake.lock @@ -7,11 +7,11 @@ ] }, "locked": { - "lastModified": 1701968480, - "narHash": "sha256-YoKN8FZllNQfpEpMqGOBv77kp9J0mlVRlhixWbcDqWg=", + "lastModified": 1705310446, + "narHash": "sha256-PaPnkGotb2omIV6OsS72MGkqNN6Q/iHLlXQZ6S3vWOY=", "ref": "refs/heads/master", - "rev": "c4d5135fde108401417fdcdf5e1c8d11aeca4f32", - "revCount": 931, + "rev": "3b21a32d835ff06741d5d59cd023ff2ae1ecb19f", + "revCount": 932, "type": "git", "url": "https://git.sr.ht/~rodarima/bscpkgs" }, diff --git a/flake.nix b/flake.nix index 8a56f28..1df4c69 100644 --- a/flake.nix +++ b/flake.nix @@ -7,11 +7,15 @@ outputs = { self, nixpkgs, bscpkgs }: let + # Set to true to replace all libovni in all runtimes with the current + # source. Causes large rebuilds on changes of ovni. + useLocalOvni = false; + ovniOverlay = final: prev: { nosv = prev.nosv.override { useGit = true; gitBranch = "master"; - gitCommit = "6a63fd4378ba458243dda3159500c1450edf0e82"; + gitCommit = "9abad7d31476e97842d3b42f1fc1fb03d4cf817b"; }; nanos6 = prev.nanos6.override { useGit = true; @@ -23,13 +27,27 @@ gitBranch = "master"; gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f"; }; + clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override { + useGit = true; + gitBranch = "master"; + gitCommit = "9dc4a4deea5e09850435782026eaae2f5290d886"; + }; + + # Use a fixed commit for libovni + ovniFixed = prev.ovni.override { + useGit = true; + gitBranch = "master"; + gitCommit = "68fc8b0eba299c3a7fa3833ace2c94933a26749e"; + }; # Build with the current source - ovni = prev.ovni.overrideAttrs (old: rec { + ovniLocal = prev.ovni.overrideAttrs (old: rec { pname = "ovni-local"; version = if self ? shortRev then self.shortRev else "dirty"; src = self; cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ]; }); + # Select correct ovni for libovni + ovni = if (useLocalOvni) then final.ovniLocal else final.ovniFixed; }; pkgs = import nixpkgs { system = "x86_64-linux"; @@ -51,12 +69,12 @@ ]; lib = pkgs.lib; in { - packages.x86_64-linux.ovniPackages = rec { + packages.x86_64-linux.ovniPackages = { + # Allow inspection of packages from the command line + inherit pkgs; + } // rec { # Build with the current source - local = pkgs.ovni.overrideAttrs (old: { - pname = "ovni-local"; - src = self; - }); + local = pkgs.ovniLocal; # Build in Debug mode debug = local.overrideAttrs (old: { @@ -97,12 +115,13 @@ # We need to be able to exit the chroot to run Nanos6 tests, as they # require access to /sys for hwloc __noChroot = true; - buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes ]); + buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes openmpv ]); cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ]; preConfigure = old.preConfigure or "" + '' export NOSV_HOME="${pkgs.nosv}" export NODES_HOME="${pkgs.nodes}" export NANOS6_HOME="${pkgs.nanos6}" + export OPENMP_RUNTIME="libompv" ''; }); diff --git a/mkdocs.yml b/mkdocs.yml index 4319916..ed93df2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,6 +37,7 @@ nav: - user/emulation/nanos6.md - user/emulation/tampi.md - user/emulation/mpi.md + - user/emulation/openmp.md - user/emulation/events.md - CHANGELOG.md - 'Developer guide': diff --git a/src/emu/openmp/event.c b/src/emu/openmp/event.c index 1250041..445db7c 100644 --- a/src/emu/openmp/event.c +++ b/src/emu/openmp/event.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) +/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC) * SPDX-License-Identifier: GPL-3.0-or-later */ #include "openmp_priv.h" @@ -14,53 +14,83 @@ enum { PUSH = 1, POP = 2, IGN = 3 }; static const int fn_table[256][256][3] = { - ['A'] = { - ['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED }, - [']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED }, - }, ['B'] = { - ['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER }, - ['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER }, - ['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER }, - ['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER }, - ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER }, - ['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER }, - ['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT }, - ['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT }, + ['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_PLAIN }, + ['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER_PLAIN }, + ['j'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_JOIN }, + ['J'] = { CH_SUBSYSTEM, POP, ST_BARRIER_JOIN }, + ['f'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_FORK }, + ['F'] = { CH_SUBSYSTEM, POP, ST_BARRIER_FORK }, + ['t'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_TASK }, + ['T'] = { CH_SUBSYSTEM, POP, ST_BARRIER_TASK }, + ['s'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT }, + ['S'] = { CH_SUBSYSTEM, IGN, ST_BARRIER_SPIN_WAIT }, + }, + ['I'] = { + ['a'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_ACQ }, + ['A'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_ACQ }, + ['r'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_REL }, + ['R'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_REL }, + ['['] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_SECTION }, + [']'] = { CH_SUBSYSTEM, POP, ST_CRITICAL_SECTION }, }, ['W'] = { - ['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC }, - ['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC }, - ['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT }, - ['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT }, - ['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK }, - ['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK }, - ['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE }, - ['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE }, + ['d'] = { CH_SUBSYSTEM, PUSH, ST_WD_DISTRIBUTE }, + ['D'] = { CH_SUBSYSTEM, POP, ST_WD_DISTRIBUTE }, + ['c'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_CHUNK }, + ['C'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_CHUNK }, + ['y'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_INIT }, + ['Y'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_DYNAMIC_INIT }, + ['s'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_STATIC }, + ['S'] = { CH_SUBSYSTEM, POP, ST_WD_FOR_STATIC }, + ['e'] = { CH_SUBSYSTEM, PUSH, ST_WD_SECTION }, + ['E'] = { CH_SUBSYSTEM, POP, ST_WD_SECTION }, + ['i'] = { CH_SUBSYSTEM, PUSH, ST_WD_SINGLE }, + ['I'] = { CH_SUBSYSTEM, POP, ST_WD_SINGLE }, }, ['T'] = { - ['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS }, - ['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS }, - ['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS }, - ['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS }, - ['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK }, - [']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK }, - ['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 }, - ['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 }, ['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC }, - ['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC }, - ['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE }, - ['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE }, - ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT }, - ['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT }, - ['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD }, - ['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD }, + ['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC }, + ['c'] = { CH_SUBSYSTEM, PUSH, ST_TASK_CHECK_DEPS }, + ['C'] = { CH_SUBSYSTEM, POP, ST_TASK_CHECK_DEPS }, ['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC }, - ['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC }, - ['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS }, - ['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS }, - ['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP }, - ['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP }, + ['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC }, + ['r'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RELEASE_DEPS }, + ['R'] = { CH_SUBSYSTEM, POP, ST_TASK_RELEASE_DEPS }, + ['['] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN }, + [']'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN }, + ['i'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN_IF0 }, + ['I'] = { CH_SUBSYSTEM, POP, ST_TASK_RUN_IF0 }, + ['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE }, + ['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE }, + ['g'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKGROUP }, + ['G'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKGROUP }, + ['t'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT }, + ['T'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT }, + ['w'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT_DEPS }, + ['W'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKWAIT_DEPS }, + ['y'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKYIELD }, + ['Y'] = { CH_SUBSYSTEM, POP, ST_TASK_TASKYIELD }, + }, + ['A'] = { + ['['] = { CH_SUBSYSTEM, PUSH, ST_RT_ATTACHED }, + [']'] = { CH_SUBSYSTEM, POP, ST_RT_ATTACHED }, + }, + ['M'] = { + ['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_INTERNAL }, + ['I'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_INTERNAL }, + ['u'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_USER }, + ['U'] = { CH_SUBSYSTEM, POP, ST_RT_MICROTASK_USER }, + }, + ['H'] = { + ['['] = { CH_SUBSYSTEM, PUSH, ST_RT_WORKER_LOOP }, + [']'] = { CH_SUBSYSTEM, POP, ST_RT_WORKER_LOOP }, + }, + ['C'] = { + ['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_INIT }, + ['I'] = { CH_SUBSYSTEM, POP, ST_RT_INIT }, + ['f'] = { CH_SUBSYSTEM, PUSH, ST_RT_FORK_CALL }, + ['F'] = { CH_SUBSYSTEM, POP, ST_RT_FORK_CALL }, }, }; diff --git a/src/emu/openmp/openmp_priv.h b/src/emu/openmp/openmp_priv.h index a5ee105..2893b32 100644 --- a/src/emu/openmp/openmp_priv.h +++ b/src/emu/openmp/openmp_priv.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) +/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC) * SPDX-License-Identifier: GPL-3.0-or-later */ #ifndef OPENMP_PRIV_H @@ -15,28 +15,42 @@ enum openmp_chan { CH_MAX, }; - enum openmp_function_values { - ST_ATTACHED = 1, - ST_JOIN_BARRIER, - ST_BARRIER, - ST_TASKING_BARRIER, - ST_SPIN_WAIT, - ST_FOR_STATIC, - ST_FOR_DYNAMIC_INIT, - ST_FOR_DYNAMIC_CHUNK, - ST_SINGLE, - ST_RELEASE_DEPS, - ST_TASKWAIT_DEPS, - ST_INVOKE_TASK, - ST_INVOKE_TASK_IF0, + ST_BARRIER_FORK = 1, + ST_BARRIER_JOIN, + ST_BARRIER_PLAIN, + ST_BARRIER_SPIN_WAIT, + ST_BARRIER_TASK, + /* Critical */ + ST_CRITICAL_ACQ, + ST_CRITICAL_REL, + ST_CRITICAL_SECTION, + /* Work-distribution */ + ST_WD_DISTRIBUTE, + ST_WD_FOR_DYNAMIC_CHUNK, + ST_WD_FOR_DYNAMIC_INIT, + ST_WD_FOR_STATIC, + ST_WD_SECTION, + ST_WD_SINGLE, + /* Task */ ST_TASK_ALLOC, - ST_TASK_SCHEDULE, - ST_TASKWAIT, - ST_TASKYIELD, + ST_TASK_CHECK_DEPS, ST_TASK_DUP_ALLOC, - ST_CHECK_DEPS, - ST_TASKGROUP, + ST_TASK_RELEASE_DEPS, + ST_TASK_RUN, + ST_TASK_RUN_IF0, + ST_TASK_SCHEDULE, + ST_TASK_TASKGROUP, + ST_TASK_TASKWAIT, + ST_TASK_TASKWAIT_DEPS, + ST_TASK_TASKYIELD, + /* Runtime */ + ST_RT_ATTACHED, + ST_RT_FORK_CALL, + ST_RT_INIT, + ST_RT_MICROTASK_INTERNAL, + ST_RT_MICROTASK_USER, + ST_RT_WORKER_LOOP, }; struct openmp_thread { diff --git a/src/emu/openmp/setup.c b/src/emu/openmp/setup.c index ddad033..4af1610 100644 --- a/src/emu/openmp/setup.c +++ b/src/emu/openmp/setup.c @@ -26,29 +26,44 @@ static const char model_name[] = "openmp"; enum { model_id = 'P' }; static struct ev_decl model_evlist[] = { - PAIR_E("PA[", "PA]", "the attached state") + PAIR_B("PBb", "PBB", "plain barrier") + PAIR_B("PBj", "PBJ", "join barrier") + PAIR_B("PBf", "PBF", "fork barrier") + PAIR_B("PBt", "PBT", "tasking barrier") + PAIR_B("PBs", "PBS", "spin wait") - PAIR_E("PBj", "PBJ", "a join barrier") - PAIR_E("PBb", "PBB", "a barrier") - PAIR_E("PBt", "PBT", "a tasking barrier") - PAIR_E("PBs", "PBS", "a spin wait") + PAIR_B("PIa", "PIA", "critical acquiring") + PAIR_B("PIr", "PIR", "critical releasing") + PAIR_B("PI[", "PI]", "critical section") - PAIR_B("PWs", "PWS", "static for") - PAIR_B("PWd", "PWD", "dynamic for init") + PAIR_B("PWd", "PWD", "distribute") + PAIR_B("PWy", "PWY", "dynamic for init") PAIR_B("PWc", "PWC", "dynamic for chunk") + PAIR_B("PWs", "PWS", "static for") + PAIR_B("PWe", "PWE", "section") PAIR_B("PWi", "PWI", "single") - PAIR_B("PTr", "PTR", "releasing task dependencies") - PAIR_B("PTw", "PTW", "waiting for taskwait dependencies") - PAIR_B("PT[", "PT]", "invoking a task") - PAIR_B("PTi", "PTI", "invoking an if0 task") PAIR_B("PTa", "PTA", "task allocation") - PAIR_B("PTs", "PTS", "scheduling a task") - PAIR_E("PTt", "PTT", "a taskwait") - PAIR_E("PTy", "PTY", "a taskyield") - PAIR_B("PTd", "PTD", "duplicating a task") PAIR_B("PTc", "PTC", "checking task dependencies") - PAIR_E("PTg", "PTG", "a taskgroup") + PAIR_B("PTd", "PTD", "duplicating a task") + PAIR_B("PTr", "PTR", "releasing task dependencies") + PAIR_B("PT[", "PT]", "running a task") + PAIR_B("PTi", "PTI", "running an if0 task") + PAIR_B("PTs", "PTS", "scheduling a task") + PAIR_B("PTg", "PTG", "a taskgroup") + PAIR_B("PTt", "PTT", "a taskwait") + PAIR_B("PTw", "PTW", "waiting for taskwait dependencies") + PAIR_B("PTy", "PTY", "a taskyield") + + PAIR_E("PA[", "PA]", "the attached state") + + PAIR_B("PMi", "PMI", "microtask internal") + PAIR_B("PMu", "PMU", "microtask user code") + + PAIR_B("PH[", "PH]", "worker loop") + + PAIR_B("PCf", "PCF", "fork call") + PAIR_B("PCi", "PCI", "initialization") { NULL, NULL }, }; @@ -75,6 +90,10 @@ static const int chan_stack[CH_MAX] = { [CH_SUBSYSTEM] = 1, }; +static const int chan_dup[CH_MAX] = { + [CH_SUBSYSTEM] = 1, +}; + /* ----------------- pvt ------------------ */ static const int pvt_type[CH_MAX] = { @@ -86,26 +105,42 @@ static const char *pcf_prefix[CH_MAX] = { }; static const struct pcf_value_label openmp_subsystem_values[] = { - { ST_ATTACHED, "Attached" }, - { ST_JOIN_BARRIER, "Join barrier" }, - { ST_BARRIER, "Barrier" }, - { ST_TASKING_BARRIER, "Tasking barrier" }, - { ST_SPIN_WAIT, "Spin wait" }, - { ST_FOR_STATIC, "For static" }, - { ST_FOR_DYNAMIC_INIT, "For dynamic init" }, - { ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" }, - { ST_SINGLE, "Single" }, - { ST_RELEASE_DEPS, "Release deps" }, - { ST_TASKWAIT_DEPS, "Taskwait deps" }, - { ST_INVOKE_TASK, "Invoke task" }, - { ST_INVOKE_TASK_IF0, "Invoke task if0" }, - { ST_TASK_ALLOC, "Task alloc" }, - { ST_TASK_SCHEDULE, "Task schedule" }, - { ST_TASKWAIT, "Taskwait" }, - { ST_TASKYIELD, "Taskyield" }, - { ST_TASK_DUP_ALLOC, "Task dup alloc" }, - { ST_CHECK_DEPS, "Check deps" }, - { ST_TASKGROUP, "Taskgroup" }, + /* Work-distribution */ + { ST_WD_DISTRIBUTE, "Work-distribution: Distribute" }, + { ST_WD_FOR_DYNAMIC_CHUNK, "Work-distribution: Dynamic for chunk" }, + { ST_WD_FOR_DYNAMIC_INIT, "Work-distribution: Dynamic for initialization" }, + { ST_WD_FOR_STATIC, "Work-distribution: Static for chunk" }, + { ST_WD_SECTION, "Work-distribution: Section" }, + { ST_WD_SINGLE, "Work-distribution: Single" }, + /* Task */ + { ST_TASK_ALLOC, "Task: Allocation" }, + { ST_TASK_CHECK_DEPS, "Task: Check deps" }, + { ST_TASK_DUP_ALLOC, "Task: Duplicating" }, + { ST_TASK_RELEASE_DEPS, "Task: Releasing deps" }, + { ST_TASK_RUN, "Task: Running task" }, + { ST_TASK_RUN_IF0, "Task: Running task if0" }, + { ST_TASK_SCHEDULE, "Task: Scheduling" }, + { ST_TASK_TASKGROUP, "Task: Taskgroup" }, + { ST_TASK_TASKWAIT, "Task: Taskwait" }, + { ST_TASK_TASKWAIT_DEPS, "Task: Taskwait deps" }, + { ST_TASK_TASKYIELD, "Task: Taskyield" }, + /* Critical */ + { ST_CRITICAL_ACQ, "Critical: Acquiring" }, + { ST_CRITICAL_REL, "Critical: Releasing" }, + { ST_CRITICAL_SECTION, "Critical: Section" }, + /* Barrier */ + { ST_BARRIER_FORK, "Barrier: Fork" }, + { ST_BARRIER_JOIN, "Barrier: Join" }, + { ST_BARRIER_PLAIN, "Barrier: Plain" }, + { ST_BARRIER_TASK, "Barrier: Task" }, + { ST_BARRIER_SPIN_WAIT, "Barrier: Spin wait" }, + /* Runtime */ + { ST_RT_ATTACHED, "Runtime: Attached" }, + { ST_RT_FORK_CALL, "Runtime: Fork call" }, + { ST_RT_INIT, "Runtime: Initialization" }, + { ST_RT_MICROTASK_INTERNAL, "Runtime: Internal microtask" }, + { ST_RT_MICROTASK_USER, "Runtime: User microtask" }, + { ST_RT_WORKER_LOOP, "Runtime: Worker main loop" }, { -1, NULL }, }; @@ -114,7 +149,7 @@ static const struct pcf_value_label *pcf_labels[CH_MAX] = { }; static const long prv_flags[CH_MAX] = { - [CH_SUBSYSTEM] = PRV_SKIPDUP, + [CH_SUBSYSTEM] = PRV_EMITDUP, }; static const struct model_pvt_spec pvt_spec = { @@ -127,7 +162,7 @@ static const struct model_pvt_spec pvt_spec = { /* ----------------- tracking ------------------ */ static const int th_track[CH_MAX] = { - [CH_SUBSYSTEM] = TRACK_TH_RUN, + [CH_SUBSYSTEM] = TRACK_TH_ACT, }; static const int cpu_track[CH_MAX] = { @@ -141,6 +176,7 @@ static const struct model_chan_spec th_chan = { .prefix = model_name, .ch_names = chan_name, .ch_stack = chan_stack, + .ch_dup = chan_dup, .pvt = &pvt_spec, .track = th_track, }; @@ -150,6 +186,7 @@ static const struct model_chan_spec cpu_chan = { .prefix = model_name, .ch_names = chan_name, .ch_stack = chan_stack, + .ch_dup = chan_dup, .pvt = &pvt_spec, .track = cpu_track, }; diff --git a/test/rt/CMakeLists.txt b/test/rt/CMakeLists.txt index 7d11797..d871fa5 100644 --- a/test/rt/CMakeLists.txt +++ b/test/rt/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(nanos6) add_subdirectory(nodes) add_subdirectory(nosv) +add_subdirectory(openmp) diff --git a/test/rt/openmp/CMakeLists.txt b/test/rt/openmp/CMakeLists.txt new file mode 100644 index 0000000..1103f8b --- /dev/null +++ b/test/rt/openmp/CMakeLists.txt @@ -0,0 +1,50 @@ +# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC) +# SPDX-License-Identifier: GPL-3.0-or-later + +check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND) +check_linker_flag(C "-fopenmp=libompv" OPENMPV_LINKER_FOUND) +cmake_path(GET CMAKE_C_COMPILER PARENT_PATH CMAKE_C_COMPILER_PATH) + +if(NOT OPENMPV_COMPILER_FOUND OR NOT OPENMPV_LINKER_FOUND) + if(ENABLE_ALL_TESTS) + message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable OpenMP-V RT tests") + else() + message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling OpenMP-V RT tests") + endif() + return() +endif() + +function(openmp_rt_test) + ovni_test(${ARGN}) + target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv" + "-no-pedantic") + target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv") + target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m") + set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY + ENVIRONMENT "OMP_OVNI=1") + set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY + ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni") +endfunction() + +openmp_rt_test(barrier-explicit.c) +openmp_rt_test(critical.c) +openmp_rt_test(if0-nested-task.c) +openmp_rt_test(if0.c) +openmp_rt_test(multi-parallels.c) +openmp_rt_test(parallel-for.c) +openmp_rt_test(parallel-loop.c) +openmp_rt_test(parallel-nested.c) +openmp_rt_test(parallel-task.c) +openmp_rt_test(sections.c) +openmp_rt_test(simple-task.c) +openmp_rt_test(task.c) +openmp_rt_test(taskloop.c) +openmp_rt_test(taskwait.c) +openmp_rt_test(team-distribute.c) +openmp_rt_test(worksharing-and-tasks.c) +openmp_rt_test(worksharing-mix.c) +openmp_rt_test(worksharing-task.c) +openmp_rt_test(worksharing.c) +openmp_rt_test(worksharing01.c) +openmp_rt_test(worksharing02.c) +openmp_rt_test(worksharing03.c) diff --git a/test/rt/openmp/barrier-explicit.c b/test/rt/openmp/barrier-explicit.c new file mode 100644 index 0000000..d403725 --- /dev/null +++ b/test/rt/openmp/barrier-explicit.c @@ -0,0 +1,47 @@ +#include +#include +#include "compat.h" + +#define N 100 + +static void +dummy_work(double *x, int i) +{ + sleep_us(i); + x[i] += sqrt((double) i); +} + +int main(void) +{ + double x[N] = { 0 }; + #pragma omp parallel + { + #pragma omp single + { + for (int i = 0; i < N; i++) { + #pragma omp task shared(x) + dummy_work(x, i); + } + } + + sleep_us(200); + #pragma omp barrier + sleep_us(1000); + #pragma omp barrier + + #pragma omp single + { + for (int i = 0; i < N; i++) { + #pragma omp task shared(x) + dummy_work(x, i); + } + } + } + + double sum = 0.0; + for (int i = 0; i < N; i++) + sum += x[i]; + + printf("sum = %e\n", sum); + return 0; +} diff --git a/test/rt/openmp/critical.c b/test/rt/openmp/critical.c new file mode 100644 index 0000000..9a69370 --- /dev/null +++ b/test/rt/openmp/critical.c @@ -0,0 +1,16 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + { + sleep_us(1000); + + #pragma omp critical + sleep_us(200); + + sleep_us(1000); + } + + return 0; +} diff --git a/test/rt/openmp/if0-nested-task.c b/test/rt/openmp/if0-nested-task.c new file mode 100644 index 0000000..d09da74 --- /dev/null +++ b/test/rt/openmp/if0-nested-task.c @@ -0,0 +1,20 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + #pragma omp single + { + #pragma omp task if(0) + { + #pragma omp task + { + sleep_us(1000); + } + #pragma omp taskwait + } + } + + return 0; +} + diff --git a/test/rt/openmp/if0.c b/test/rt/openmp/if0.c new file mode 100644 index 0000000..20b9c6b --- /dev/null +++ b/test/rt/openmp/if0.c @@ -0,0 +1,17 @@ +#include +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + #pragma omp single + { + #pragma omp task if(0) + { + sleep_us(1000); + } + } + + return 0; +} + diff --git a/test/rt/openmp/multi-parallels.c b/test/rt/openmp/multi-parallels.c new file mode 100644 index 0000000..45d9561 --- /dev/null +++ b/test/rt/openmp/multi-parallels.c @@ -0,0 +1,13 @@ +#include "compat.h" + +int main(void) +{ + for (int i = 0; i < 10; i++) { + #pragma omp parallel + { + sleep_us(1000); + } + } + + return 0; +} diff --git a/test/rt/openmp/parallel-for.c b/test/rt/openmp/parallel-for.c new file mode 100644 index 0000000..c2d74a8 --- /dev/null +++ b/test/rt/openmp/parallel-for.c @@ -0,0 +1,29 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + { + #pragma omp for + for (int i = 0; i < 100; i++) { + sleep_us(1); + } + + #pragma omp for schedule(dynamic, 1) + for (int i = 0; i < 100; i++) { + sleep_us(i); + } + + #pragma omp for + for (int i = 0; i < 100; i++) { + sleep_us(1); + } + + #pragma omp for schedule(dynamic, 1) + for (int i = 0; i < 100; i++) { + sleep_us(i); + } + } + + return 0; +} diff --git a/test/rt/openmp/parallel-loop.c b/test/rt/openmp/parallel-loop.c new file mode 100644 index 0000000..51d85f4 --- /dev/null +++ b/test/rt/openmp/parallel-loop.c @@ -0,0 +1,14 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + { + #pragma omp loop + for (int i = 0; i < 100; i++) { + sleep_us(1); + } + } + + return 0; +} diff --git a/test/rt/openmp/parallel-nested.c b/test/rt/openmp/parallel-nested.c new file mode 100644 index 0000000..ca4f144 --- /dev/null +++ b/test/rt/openmp/parallel-nested.c @@ -0,0 +1,22 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + { + #pragma omp for schedule(dynamic, 1) + for (int i = 0; i < 100; i++) { + sleep_us(i); + } + + #pragma omp parallel + { + #pragma omp for schedule(dynamic, 1) + for (int i = 0; i < 100; i++) { + sleep_us(i); + } + } + } + + return 0; +} diff --git a/test/rt/openmp/parallel-task.c b/test/rt/openmp/parallel-task.c new file mode 100644 index 0000000..7d8b84a --- /dev/null +++ b/test/rt/openmp/parallel-task.c @@ -0,0 +1,26 @@ +#include "compat.h" + +static void foo(void) +{ + #pragma omp for schedule(dynamic, 1) + for (int i = 0; i < 100; i++) + sleep_us(i); + + #pragma omp single + for (int i = 0; i < 100; i++) + { + #pragma omp task + sleep_us(10); + } +} + +int main(void) +{ + #pragma omp parallel + { + foo(); + foo(); + } + + return 0; +} diff --git a/test/rt/openmp/sections.c b/test/rt/openmp/sections.c new file mode 100644 index 0000000..ed39a49 --- /dev/null +++ b/test/rt/openmp/sections.c @@ -0,0 +1,51 @@ +#include +#include "compat.h" + +int main(void) +{ + #pragma omp parallel sections + { + #pragma omp section + { sleep_us(1001); printf("1001\n"); } + #pragma omp section + { sleep_us(1002); printf("1002\n"); } + #pragma omp section + { sleep_us(1003); printf("1003\n"); } + #pragma omp section + { sleep_us(1004); printf("1004\n"); } + #pragma omp section + sleep_us(1005); + #pragma omp section + sleep_us(1006); + #pragma omp section + sleep_us(1007); + #pragma omp section + sleep_us(1008); + #pragma omp section + sleep_us(1009); + #pragma omp section + sleep_us(1010); + #pragma omp section + sleep_us(1011); + #pragma omp section + sleep_us(1012); + #pragma omp section + sleep_us(1013); + #pragma omp section + sleep_us(1014); + #pragma omp section + sleep_us(1015); + #pragma omp section + sleep_us(1016); + #pragma omp section + sleep_us(1017); + #pragma omp section + sleep_us(1018); + #pragma omp section + sleep_us(1019); + #pragma omp section + sleep_us(1020); + } + + return 0; +} diff --git a/test/rt/openmp/simple-task.c b/test/rt/openmp/simple-task.c new file mode 100644 index 0000000..0b8a618 --- /dev/null +++ b/test/rt/openmp/simple-task.c @@ -0,0 +1,23 @@ +#include "compat.h" + +int main(void) +{ + int a; + int *p = &a; + + #pragma omp parallel + #pragma omp single + { + #pragma omp task depend(out : p[0]) + { + sleep_us(1000); + } + for (int i = 0; i < 10000; i++) + { + #pragma omp task depend(in : p[0]) + sleep_us(1); + } + } + + return 0; +} diff --git a/test/rt/openmp/task.c b/test/rt/openmp/task.c new file mode 100644 index 0000000..7d82c08 --- /dev/null +++ b/test/rt/openmp/task.c @@ -0,0 +1,19 @@ +#include +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + { + for (int i = 0; i < 10; i++) { + #pragma omp task + { + printf("%d\n", i); + sleep_us(100); + } + } + #pragma omp barrier + } + + return 0; +} diff --git a/test/rt/openmp/taskloop.c b/test/rt/openmp/taskloop.c new file mode 100644 index 0000000..8a10d6e --- /dev/null +++ b/test/rt/openmp/taskloop.c @@ -0,0 +1,17 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + #pragma omp single + { + #pragma omp taskloop + for (int i = 0; i < 10000; i++) + { + #pragma omp task + sleep_us(1); + } + } + + return 0; +} diff --git a/test/rt/openmp/taskwait.c b/test/rt/openmp/taskwait.c new file mode 100644 index 0000000..dbba63c --- /dev/null +++ b/test/rt/openmp/taskwait.c @@ -0,0 +1,42 @@ +#include "compat.h" +#include + +int main(void) +{ + #pragma omp parallel + #pragma omp single + { + #pragma omp task label("A") + { + sleep_us(5000); + printf("A\n"); + } + + #pragma omp task label("B") + { + #pragma omp task label("B1") + { + sleep_us(2000); + printf("B1\n"); + } + + /* Shouldn't wait for task A */ + #pragma omp taskwait + + #pragma omp task + { + sleep_us(1000); + printf("B2\n"); + } + } + + #pragma omp task label("C") + { + printf("C\n"); + } + } + + /* Expected output C B1 B2 A */ + + return 0; +} diff --git a/test/rt/openmp/team-distribute.c b/test/rt/openmp/team-distribute.c new file mode 100644 index 0000000..daf5051 --- /dev/null +++ b/test/rt/openmp/team-distribute.c @@ -0,0 +1,14 @@ +#include +#include "compat.h" + +int main(void) +{ + #pragma omp teams num_teams(2) + { + #pragma omp distribute parallel for + for (volatile int i = 0; i < 1000; i++) + sleep_us(100 + i); + } + + return 0; +} diff --git a/test/rt/openmp/worksharing-and-tasks.c b/test/rt/openmp/worksharing-and-tasks.c new file mode 100644 index 0000000..7d16634 --- /dev/null +++ b/test/rt/openmp/worksharing-and-tasks.c @@ -0,0 +1,34 @@ +#include +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + { + //#pragma omp single nowait + for (int i = 0; i < 100; i++) { + #pragma omp task + sleep_us(10); + } + + /* Wait a bit for task allocation */ + sleep_us(1000); + + /* Occupy 4 CPUs with sections */ + #pragma omp sections nowait + { + #pragma omp section + { sleep_us(1001); printf("1001\n"); } + #pragma omp section + { sleep_us(1002); printf("1002\n"); } + #pragma omp section + { sleep_us(1003); printf("1003\n"); } + #pragma omp section + { sleep_us(1004); printf("1004\n"); } + } + + #pragma omp taskwait + } + + return 0; +} diff --git a/test/rt/openmp/worksharing-mix.c b/test/rt/openmp/worksharing-mix.c new file mode 100644 index 0000000..86c4e9c --- /dev/null +++ b/test/rt/openmp/worksharing-mix.c @@ -0,0 +1,66 @@ +#include +#include +#include "compat.h" + +/* Test several work-distribution and task constructs, so we can generate a + * trace that includes most of the states. */ + +int main(void) +{ + #pragma omp parallel + { + #pragma omp for + for (int i = 0; i < 100; i++) { + sleep_us(1); + } + + #pragma omp sections + { + #pragma omp section + { sleep_us(101); printf("101\n"); } + #pragma omp section + { sleep_us(102); printf("102\n"); } + #pragma omp section + { sleep_us(103); printf("103\n"); } + #pragma omp section + { sleep_us(104); printf("104\n"); } + } + + #pragma omp for + for (int i = 0; i < 100; i++) { + sleep_us(1); + } + + #pragma omp single + for (int i = 0; i < 100; i++) + { + #pragma omp task + sleep_us(10); + } + } + + #pragma omp parallel + { + #pragma omp critical + sleep_us(20); + + #pragma omp barrier + + #pragma omp for + for (int i = 0; i < 100; i++) { + sleep_us(1); + } + #pragma omp for schedule(dynamic, 1) + for (int i = 0; i < 100; i++) { + sleep_us(i); + } + } + + // FIXME: Crashes OpenMP-V runtime + //#pragma omp distribute parallel for + //for (int i = 0; i < 1000; i++) { + // sleep_us(1); + //} + + return 0; +} diff --git a/test/rt/openmp/worksharing-task.c b/test/rt/openmp/worksharing-task.c new file mode 100644 index 0000000..1e10834 --- /dev/null +++ b/test/rt/openmp/worksharing-task.c @@ -0,0 +1,25 @@ +#include +#include "compat.h" + +static void foo(void) +{ + #pragma omp for + for (int i = 0; i < 100; ++i) + { + #pragma omp task + { + sleep_us(1); + } + } +} + +int main(void) +{ + #pragma omp parallel + { + foo(); + foo(); + } + + return 0; +} diff --git a/test/rt/openmp/worksharing.c b/test/rt/openmp/worksharing.c new file mode 100644 index 0000000..7a740a4 --- /dev/null +++ b/test/rt/openmp/worksharing.c @@ -0,0 +1,16 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + { + #pragma omp for schedule(dynamic) ordered label("omp for dynamic") + for (int i = 0; i < 100; i++) + sleep_us(100); + + #pragma omp single label("single") + sleep_us(1000); + } + + return 0; +} diff --git a/test/rt/openmp/worksharing01.c b/test/rt/openmp/worksharing01.c new file mode 100644 index 0000000..d214fda --- /dev/null +++ b/test/rt/openmp/worksharing01.c @@ -0,0 +1,11 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + #pragma omp for schedule(static, 30) + for (int i = 0; i < 100; i++) + sleep_us(10); + + return 0; +} diff --git a/test/rt/openmp/worksharing02.c b/test/rt/openmp/worksharing02.c new file mode 100644 index 0000000..f32c67e --- /dev/null +++ b/test/rt/openmp/worksharing02.c @@ -0,0 +1,12 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp target + #pragma omp teams num_teams(1) + #pragma omp distribute dist_schedule(static, 30) + for (int i = 0; i < 100; i++) + sleep_us(10); + + return 0; +} diff --git a/test/rt/openmp/worksharing03.c b/test/rt/openmp/worksharing03.c new file mode 100644 index 0000000..a9fa983 --- /dev/null +++ b/test/rt/openmp/worksharing03.c @@ -0,0 +1,11 @@ +#include "compat.h" + +int main(void) +{ + #pragma omp parallel + #pragma omp for schedule(dynamic) + for (int i = 0; i < 100; i++) + sleep_us(10); + + return 0; +}