Update OpenMP emulation model
Add more tests, subsystem states and documentation.
This commit is contained in:
		
							parent
							
								
									bf2b3b73a0
								
							
						
					
					
						commit
						55318d9da7
					
				| @ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | |||||||
| 
 | 
 | ||||||
| ## [Unreleased] | ## [Unreleased] | ||||||
| 
 | 
 | ||||||
|  | ### Added | ||||||
|  | 
 | ||||||
|  | - Add OpenMP model (`P`) at version 1.1.0 (currently it only supports subsystems | ||||||
|  |   and only works with the OpenMP-V runtime, on top of nOS-V). | ||||||
|  | 
 | ||||||
| ### Changed | ### Changed | ||||||
| 
 | 
 | ||||||
| - Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}. | - Add support for `nosv_attach` and `nosv_detach` events VA{aAeE}. | ||||||
|  | |||||||
| @ -16,6 +16,8 @@ window_height 150 | |||||||
| window_comm_lines_enabled true | window_comm_lines_enabled true | ||||||
| window_flags_enabled false | window_flags_enabled false | ||||||
| window_noncolor_mode true | window_noncolor_mode true | ||||||
|  | window_custom_color_enabled true | ||||||
|  | window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126} | ||||||
| window_logical_filtered true | window_logical_filtered true | ||||||
| window_physical_filtered false | window_physical_filtered false | ||||||
| window_comm_fromto true | window_comm_fromto true | ||||||
|  | |||||||
| @ -4,9 +4,9 @@ ConfigFile.NumWindows: 1 | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| ################################################################################ | ################################################################################ | ||||||
| < NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread > | < NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the ACTIVE thread > | ||||||
| ################################################################################ | ################################################################################ | ||||||
| window_name Thread: OpenMP subsystem of the RUNNING thread | window_name Thread: OpenMP subsystem of the ACTIVE thread | ||||||
| window_type single | window_type single | ||||||
| window_id 1 | window_id 1 | ||||||
| window_position_x 0 | window_position_x 0 | ||||||
| @ -16,6 +16,8 @@ window_height 150 | |||||||
| window_comm_lines_enabled true | window_comm_lines_enabled true | ||||||
| window_flags_enabled false | window_flags_enabled false | ||||||
| window_noncolor_mode true | window_noncolor_mode true | ||||||
|  | window_custom_color_enabled true | ||||||
|  | window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126} | ||||||
| window_logical_filtered true | window_logical_filtered true | ||||||
| window_physical_filtered false | window_physical_filtered false | ||||||
| window_comm_fromto true | window_comm_fromto true | ||||||
| @ -38,5 +40,5 @@ window_labels_to_draw 1 | |||||||
| window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } | ||||||
| window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } | ||||||
| window_filter_module evt_type 1 50 | window_filter_module evt_type 1 50 | ||||||
| window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread" | window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the ACTIVE thread" | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -433,86 +433,130 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`: | |||||||
| 
 | 
 | ||||||
| List of events for the model *openmp* with identifier **`P`** at version `1.1.0`: | List of events for the model *openmp* with identifier **`P`** at version `1.1.0`: | ||||||
| <dl> | <dl> | ||||||
| <dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt> |  | ||||||
| <dd>enters the attached state</dd> |  | ||||||
| <dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt> |  | ||||||
| <dd>leaves the attached state</dd> |  | ||||||
| <dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt> |  | ||||||
| <dd>enters a join barrier</dd> |  | ||||||
| <dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt> |  | ||||||
| <dd>leaves a join barrier</dd> |  | ||||||
| <dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt> | <dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt> | ||||||
| <dd>enters a barrier</dd> | <dd>begins plain barrier</dd> | ||||||
| <dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt> | <dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt> | ||||||
| <dd>leaves a barrier</dd> | <dd>ceases plain barrier</dd> | ||||||
|  | <dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt> | ||||||
|  | <dd>begins join barrier</dd> | ||||||
|  | <dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt> | ||||||
|  | <dd>ceases join barrier</dd> | ||||||
|  | <dt><a id="PBf" href="#PBf"><pre>PBf</pre></a></dt> | ||||||
|  | <dd>begins fork barrier</dd> | ||||||
|  | <dt><a id="PBF" href="#PBF"><pre>PBF</pre></a></dt> | ||||||
|  | <dd>ceases fork barrier</dd> | ||||||
| <dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt> | <dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt> | ||||||
| <dd>enters a tasking barrier</dd> | <dd>begins tasking barrier</dd> | ||||||
| <dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt> | <dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt> | ||||||
| <dd>leaves a tasking barrier</dd> | <dd>ceases tasking barrier</dd> | ||||||
| <dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt> | <dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt> | ||||||
| <dd>enters a spin wait</dd> | <dd>begins spin wait</dd> | ||||||
| <dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt> | <dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt> | ||||||
| <dd>leaves a spin wait</dd> | <dd>ceases spin wait</dd> | ||||||
| <dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt> | <dt><a id="PIa" href="#PIa"><pre>PIa</pre></a></dt> | ||||||
| <dd>begins static for</dd> | <dd>begins critical acquiring</dd> | ||||||
| <dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt> | <dt><a id="PIA" href="#PIA"><pre>PIA</pre></a></dt> | ||||||
| <dd>ceases static for</dd> | <dd>ceases critical acquiring</dd> | ||||||
|  | <dt><a id="PIr" href="#PIr"><pre>PIr</pre></a></dt> | ||||||
|  | <dd>begins critical releasing</dd> | ||||||
|  | <dt><a id="PIR" href="#PIR"><pre>PIR</pre></a></dt> | ||||||
|  | <dd>ceases critical releasing</dd> | ||||||
|  | <dt><a id="PI[" href="#PI["><pre>PI[</pre></a></dt> | ||||||
|  | <dd>begins critical section</dd> | ||||||
|  | <dt><a id="PI]" href="#PI]"><pre>PI]</pre></a></dt> | ||||||
|  | <dd>ceases critical section</dd> | ||||||
| <dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt> | <dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt> | ||||||
| <dd>begins dynamic for init</dd> | <dd>begins distribute</dd> | ||||||
| <dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt> | <dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt> | ||||||
|  | <dd>ceases distribute</dd> | ||||||
|  | <dt><a id="PWy" href="#PWy"><pre>PWy</pre></a></dt> | ||||||
|  | <dd>begins dynamic for init</dd> | ||||||
|  | <dt><a id="PWY" href="#PWY"><pre>PWY</pre></a></dt> | ||||||
| <dd>ceases dynamic for init</dd> | <dd>ceases dynamic for init</dd> | ||||||
| <dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt> | <dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt> | ||||||
| <dd>begins dynamic for chunk</dd> | <dd>begins dynamic for chunk</dd> | ||||||
| <dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt> | <dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt> | ||||||
| <dd>ceases dynamic for chunk</dd> | <dd>ceases dynamic for chunk</dd> | ||||||
|  | <dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt> | ||||||
|  | <dd>begins static for</dd> | ||||||
|  | <dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt> | ||||||
|  | <dd>ceases static for</dd> | ||||||
|  | <dt><a id="PWe" href="#PWe"><pre>PWe</pre></a></dt> | ||||||
|  | <dd>begins section</dd> | ||||||
|  | <dt><a id="PWE" href="#PWE"><pre>PWE</pre></a></dt> | ||||||
|  | <dd>ceases section</dd> | ||||||
| <dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt> | <dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt> | ||||||
| <dd>begins single</dd> | <dd>begins single</dd> | ||||||
| <dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt> | <dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt> | ||||||
| <dd>ceases single</dd> | <dd>ceases single</dd> | ||||||
| <dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt> |  | ||||||
| <dd>begins releasing task dependencies</dd> |  | ||||||
| <dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt> |  | ||||||
| <dd>ceases releasing task dependencies</dd> |  | ||||||
| <dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt> |  | ||||||
| <dd>begins waiting for taskwait dependencies</dd> |  | ||||||
| <dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt> |  | ||||||
| <dd>ceases waiting for taskwait dependencies</dd> |  | ||||||
| <dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt> |  | ||||||
| <dd>begins invoking a task</dd> |  | ||||||
| <dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt> |  | ||||||
| <dd>ceases invoking a task</dd> |  | ||||||
| <dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt> |  | ||||||
| <dd>begins invoking an if0 task</dd> |  | ||||||
| <dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt> |  | ||||||
| <dd>ceases invoking an if0 task</dd> |  | ||||||
| <dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt> | <dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt> | ||||||
| <dd>begins task allocation</dd> | <dd>begins task allocation</dd> | ||||||
| <dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt> | <dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt> | ||||||
| <dd>ceases task allocation</dd> | <dd>ceases task allocation</dd> | ||||||
| <dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt> |  | ||||||
| <dd>begins scheduling a task</dd> |  | ||||||
| <dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt> |  | ||||||
| <dd>ceases scheduling a task</dd> |  | ||||||
| <dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt> |  | ||||||
| <dd>enters a taskwait</dd> |  | ||||||
| <dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt> |  | ||||||
| <dd>leaves a taskwait</dd> |  | ||||||
| <dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt> |  | ||||||
| <dd>enters a taskyield</dd> |  | ||||||
| <dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt> |  | ||||||
| <dd>leaves a taskyield</dd> |  | ||||||
| <dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt> |  | ||||||
| <dd>begins duplicating a task</dd> |  | ||||||
| <dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt> |  | ||||||
| <dd>ceases duplicating a task</dd> |  | ||||||
| <dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt> | <dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt> | ||||||
| <dd>begins checking task dependencies</dd> | <dd>begins checking task dependencies</dd> | ||||||
| <dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt> | <dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt> | ||||||
| <dd>ceases checking task dependencies</dd> | <dd>ceases checking task dependencies</dd> | ||||||
|  | <dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt> | ||||||
|  | <dd>begins duplicating a task</dd> | ||||||
|  | <dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt> | ||||||
|  | <dd>ceases duplicating a task</dd> | ||||||
|  | <dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt> | ||||||
|  | <dd>begins releasing task dependencies</dd> | ||||||
|  | <dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt> | ||||||
|  | <dd>ceases releasing task dependencies</dd> | ||||||
|  | <dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt> | ||||||
|  | <dd>begins running a task</dd> | ||||||
|  | <dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt> | ||||||
|  | <dd>ceases running a task</dd> | ||||||
|  | <dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt> | ||||||
|  | <dd>begins running an if0 task</dd> | ||||||
|  | <dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt> | ||||||
|  | <dd>ceases running an if0 task</dd> | ||||||
|  | <dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt> | ||||||
|  | <dd>begins scheduling a task</dd> | ||||||
|  | <dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt> | ||||||
|  | <dd>ceases scheduling a task</dd> | ||||||
| <dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt> | <dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt> | ||||||
| <dd>enters a taskgroup</dd> | <dd>begins a taskgroup</dd> | ||||||
| <dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt> | <dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt> | ||||||
| <dd>leaves a taskgroup</dd> | <dd>ceases a taskgroup</dd> | ||||||
|  | <dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt> | ||||||
|  | <dd>begins a taskwait</dd> | ||||||
|  | <dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt> | ||||||
|  | <dd>ceases a taskwait</dd> | ||||||
|  | <dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt> | ||||||
|  | <dd>begins waiting for taskwait dependencies</dd> | ||||||
|  | <dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt> | ||||||
|  | <dd>ceases waiting for taskwait dependencies</dd> | ||||||
|  | <dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt> | ||||||
|  | <dd>begins a taskyield</dd> | ||||||
|  | <dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt> | ||||||
|  | <dd>ceases a taskyield</dd> | ||||||
|  | <dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt> | ||||||
|  | <dd>enters the attached state</dd> | ||||||
|  | <dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt> | ||||||
|  | <dd>leaves the attached state</dd> | ||||||
|  | <dt><a id="PMi" href="#PMi"><pre>PMi</pre></a></dt> | ||||||
|  | <dd>begins microtask internal</dd> | ||||||
|  | <dt><a id="PMI" href="#PMI"><pre>PMI</pre></a></dt> | ||||||
|  | <dd>ceases microtask internal</dd> | ||||||
|  | <dt><a id="PMu" href="#PMu"><pre>PMu</pre></a></dt> | ||||||
|  | <dd>begins microtask user code</dd> | ||||||
|  | <dt><a id="PMU" href="#PMU"><pre>PMU</pre></a></dt> | ||||||
|  | <dd>ceases microtask user code</dd> | ||||||
|  | <dt><a id="PH[" href="#PH["><pre>PH[</pre></a></dt> | ||||||
|  | <dd>begins worker loop</dd> | ||||||
|  | <dt><a id="PH]" href="#PH]"><pre>PH]</pre></a></dt> | ||||||
|  | <dd>ceases worker loop</dd> | ||||||
|  | <dt><a id="PCf" href="#PCf"><pre>PCf</pre></a></dt> | ||||||
|  | <dd>begins fork call</dd> | ||||||
|  | <dt><a id="PCF" href="#PCF"><pre>PCF</pre></a></dt> | ||||||
|  | <dd>ceases fork call</dd> | ||||||
|  | <dt><a id="PCi" href="#PCi"><pre>PCi</pre></a></dt> | ||||||
|  | <dd>begins initialization</dd> | ||||||
|  | <dt><a id="PCI" href="#PCI"><pre>PCI</pre></a></dt> | ||||||
|  | <dd>ceases initialization</dd> | ||||||
| </dl> | </dl> | ||||||
| 
 | 
 | ||||||
| ## Model tampi | ## Model tampi | ||||||
|  | |||||||
							
								
								
									
										
											BIN
										
									
								
								doc/user/emulation/fig/openmp-subsystem.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								doc/user/emulation/fig/openmp-subsystem.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 9.4 KiB | 
| @ -1,164 +1,243 @@ | |||||||
| # OpenMP Model | # OpenMP model | ||||||
| 
 | 
 | ||||||
| The LLVM OpenMP Runtime is an integral component of the LLVM compiler | The [OpenMP programming model](https://www.openmp.org) is a widely used API and | ||||||
| infrastructure that provides support for the OpenMP (Open Multi-Processing) | set of directives for parallel programming, allowing developers to write | ||||||
| programming model. | multi-threaded and multi-process applications more easily. In this document we | ||||||
|  | refer to the | ||||||
|  | [version 5.2 of the OpenMP specification](https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf). | ||||||
| 
 | 
 | ||||||
| OpenMP is a widely used API and set of directives for parallel programming, | The [LLVM OpenMP Runtime](https://openmp.llvm.org/design/Runtimes.html) provides | ||||||
| allowing developers to write multi-threaded and multi-process applications more | an implementation of the OpenMP specification as a component of the LLVM | ||||||
| easily. | compiler infrastructure. We have modified the LLVM OpenMP runtime to run on top | ||||||
|  | of the [nOS-V](https://gitlab.bsc.es/nos-v/nos-v) runtime as part of the | ||||||
|  | [OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **OpenMP-V**. | ||||||
| 
 | 
 | ||||||
| This documentation is about an OpenMP runtime built on top of [nOS-V][nosv], | We have added instrumentation events to OpenMP-V designed to be enabled along | ||||||
| leveraging its thread management capabilities while retaining the fundamental | the [nOS-V instrumentation](nosv.md). This document describes all the | ||||||
| characteristics of the original runtime. | instrumentation features included in our modified OpenMP-V runtime to identify | ||||||
|  | what is happening. This data is useful for both users and developers of the | ||||||
|  | OpenMP runtime to analyze issues and undesired behaviors. | ||||||
| 
 | 
 | ||||||
| While the modifications introduced to the runtime may appear to be minor, it's | !!! Note | ||||||
| important to note that this enhanced version is not API compatible with the |  | ||||||
| original runtime. As a result, it is mandatory to use the clang built in the same |  | ||||||
| [LLVM Project][llvm]. |  | ||||||
| 
 | 
 | ||||||
| This document describes all the instrumentation features included in the runtime |     Instrumenting the original OpenMP runtime from the LLVM project is planned | ||||||
| by both nOS-V and OpenMP to monitor task execution and the execution flow within |     but is not yet posible. For now you must use the modified OpenMP-V runtime | ||||||
| the runtime library to identify what is happening. This data is useful for both |     with nOS-V. | ||||||
| users and developers of the OpenMP runtime to analyze issues and undesired |  | ||||||
| behaviors. |  | ||||||
| 
 | 
 | ||||||
| [llvm]: https://pm.bsc.es/llvm-ompss | ## Enable the instrumentation | ||||||
| [nosv]: https://gitlab.bsc.es/nos-v/nos-v |  | ||||||
| 
 | 
 | ||||||
| ## How to Generate Execution Traces | To generate runtime traces, you will have to: | ||||||
| 
 | 
 | ||||||
| In order to build the OpenMP runtime nOS-V must be provided by using | 1. **Build nOS-V with ovni support:** Refer to the | ||||||
| `PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a |   [nOS-V | ||||||
| runtime without instrumentation. However, the user may be able to generate |   documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md). | ||||||
| execution traces by enabling nOS-V instrumentation through |   Typically you should use the `--with-ovni` option at configure time to specify | ||||||
| `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a |   where ovni is installed. | ||||||
| nOS-V installation built with ovni. | 2. **Build OpenMP-V with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH` | ||||||
|  |   environment variable to specify the nOS-V and ovni installation  | ||||||
|  |   when configuring CMake. | ||||||
|  | 3. **Enable the instrumentation in nOS-V at runtime:** Refer to the | ||||||
|  |   [nOS-V documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md) | ||||||
|  |   to find out how to enable the tracing at runtime. Typically you can just set  | ||||||
|  |   `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. | ||||||
|  | 4. **Enable the instrumentation of OpenMP-V at runtime:** Set the environment | ||||||
|  |   variable `OMP_OVNI=1`. | ||||||
| 
 | 
 | ||||||
| Building OpenMP with instrumentation requires to pass ovni pkg-config path to | Currently there is only support for the subsystem view, which is documented | ||||||
| `PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is | below. The view is complemented with the information of [nOS-V views](nosv.md), | ||||||
| because OpenMP is dependent of nOS-V to generate complete execution traces. | as OpenMP-V uses nOS-V tasks to run the workers. | ||||||
| 
 | 
 | ||||||
| By default, OpenMP will not instrument anything. To enable instrumentation the | ## Subsystem view | ||||||
| user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. |  | ||||||
| 
 | 
 | ||||||
| The following sections will describe the OpenMP execution trace views and what |  | ||||||
| information is shown there. |  | ||||||
| 
 |  | ||||||
| ## nOS-V Task Type |  | ||||||
| 
 |  | ||||||
| As said in the previous sections. This OpenMP runtime is built on top of nOS-V. |  | ||||||
| So the user can explore what does the execution do there. Here we only describe |  | ||||||
| the task type view. For other views please take a look at the nOS-V chapter. |  | ||||||
| 
 |  | ||||||
| In OpenMP, every thread that is launched (main thread included) is shown in a task |  | ||||||
| type with label "openmp". In a task application, every task call will be seen with |  | ||||||
| a task type with label "file:line:col" format referring to the pragma location. This |  | ||||||
| can be changed by using the clause label(string-literal). |  | ||||||
| 
 |  | ||||||
| OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for |  | ||||||
| more information. Nevertheless, the OpenMP task view shows it. |  | ||||||
| 
 |  | ||||||
| ## OpenMP Subsystem |  | ||||||
| 
 | 
 | ||||||
| This view illustrates the activities of each thread with different states: | This view illustrates the activities of each thread with different states: | ||||||
| 
 | 
 | ||||||
| - **Attached**: The thread is attached. | - **Work-distribution subsystem**: Related to work-distribution constructs, | ||||||
|  |     [in Chapter 11][workdis]. | ||||||
| 
 | 
 | ||||||
| - **Join barrier**: The thread is in the implicit barrier of the parallel region. |     - **Distribute**: Running a *Distribute* region. | ||||||
| 
 | 
 | ||||||
| - **Tasking barrier**: The thread is in the additional tasking barrier trying to |     - **Dynamic for chunk**: Running a chunk of a dynamic *for*, which often | ||||||
|   execute tasks. This event happens if executed with KMP_TASKING=1. |       involve running more than one iteration of the loop. See the | ||||||
|  |       [limitations](#dynamic_for) below. | ||||||
| 
 | 
 | ||||||
| - **Spin wait**: The thread spin waits for a condition. Usually this event happens |     - **Dynamic for initialization**: Preparing a dynamic *for*. | ||||||
|   in a barrier while waiting for the other threads to reach the barrier. The thread |  | ||||||
|   also tries to execute tasks. |  | ||||||
| 
 | 
 | ||||||
| - **For static**: Executing a for static. The length of the event represents all the |     - **Static for chunk**: Executing the assigned iterations of an static | ||||||
|   chunks of iterations executed by the thread. See "Limitations" section. |       *for*. | ||||||
| 
 | 
 | ||||||
| - **For dynamic init**: Running the initialization of an OpenMP for dynamic. |     - **Single**: Running a *Single* region. All threads of the parallel region | ||||||
|  |       participate. | ||||||
| 
 | 
 | ||||||
| - **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To |     - **Section**: Running a *Section* region. All threads of the parallel region | ||||||
|   clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and |       participate. | ||||||
|   from 8 to 12, two different events will be shown. See "Limitations" section. |  | ||||||
| 
 | 
 | ||||||
| - **Single**: Running a Single region. All threads of the parallel region will emit | - **Task subsystem**: Related to tasking constructs, [in Chapter 12][tasking]. | ||||||
|   the event. |  | ||||||
| 
 | 
 | ||||||
| - **Release deps**: When finishing a task, trying to release dependencies. This |     - **Allocation**: Allocating the task descriptor. | ||||||
|   event happens although the task has no dependencies. |  | ||||||
| 
 | 
 | ||||||
| - **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled. |     - **Check deps**: Checking if the task has pending dependencies to be | ||||||
|   This appears typically in a task if0 with dependencies or a taskwait with deps. |       fulfilled. When all dependencies are fulfilled the task will be scheduled. | ||||||
| 
 | 
 | ||||||
| - **Invoke task**: Executing a task. |     - **Duplicating**: Duplicating the task descriptor in a taskloop. | ||||||
| 
 | 
 | ||||||
| - **Invoke task if0**: Executing a task if0. |     - **Releasing deps**: Releasing dependencies at the end of a task. This | ||||||
|  |       state is always present even if the task has no dependencies. | ||||||
| 
 | 
 | ||||||
| - **Task alloc**: Allocating the task descriptor. |     - **Running task**: Executing a task. | ||||||
| 
 | 
 | ||||||
| - **Task schedule**: Adding the task to the scheduler. |     - **Running task if0**: Executing a task if0. | ||||||
| 
 | 
 | ||||||
| - **Taskwait**: Running a taskwait. |     - **Scheduling**: Adding the task to the scheduler for execution. | ||||||
| 
 | 
 | ||||||
| - **Taskyield**: Running a taskyield. |     - **Taskgroup**: Waiting in a *taskgroup* construct. | ||||||
| 
 | 
 | ||||||
| - **Task dup alloc**: Duplicating the task descriptor in a taskloop. |     - **Taskwait**: Waiting in a *taskwait* construct. | ||||||
| 
 | 
 | ||||||
| - **Check deps**: Checking if the task has pending dependencies to be fulfilled. This |     - **Taskwait deps**: Trying to execute tasks until dependencies have been | ||||||
|   means that if all dependencies are fulfilled the task will be scheduled. |       fulfilled. This appears typically in a task if0 with dependencies or a | ||||||
|  |       taskwait with deps. | ||||||
|      |      | ||||||
| - **Taskgroup**: Running a taskgroup. |     - **Taskyield**: Performing a *taskyield* construct. | ||||||
|  | 
 | ||||||
|  | - **Critical subsystem**: Related to the *critical* Constuct, in [Section 15.2][critical]. | ||||||
|  | 
 | ||||||
|  |     - **Acquiring**: Waiting to acquire a *Critical* section. | ||||||
|  | 
 | ||||||
|  |     - **Section**: Running the *Critical* section. | ||||||
|  | 
 | ||||||
|  |     - **Releasing**: Waiting to release a *Critical* section. | ||||||
|  | 
 | ||||||
|  | - **Barrier subsystem**: Related to barriers, in [Section 15.3][barrier]. | ||||||
|  |     **All barriers can try to execute tasks**. | ||||||
|  | 
 | ||||||
|  |     - **Barrier: Fork**: Workers wait for a release signal from the master thread to | ||||||
|  |       continue. The master can continue as soon as it signals the workers. It is | ||||||
|  |       done at the beginning of a fork-join region. | ||||||
|  | 
 | ||||||
|  |     - **Barrier: Join**: The master thread waits until all workers finish their work. | ||||||
|  |       Workers can continue as soon as they signal the master. It is done at the | ||||||
|  |       end of a fork-join region. | ||||||
|  |    | ||||||
|  |     - **Barrier: Plain**: Performing a plain barrier, which waits for a release | ||||||
|  |       signal from the master thread to continue. It is done at the beginning of | ||||||
|  |       a fork-join region, in the `__kmp_join_barrier()` function. | ||||||
|  | 
 | ||||||
|  |     - **Barrier: Task**: Blocked in an additional tasking barrier *until all previous | ||||||
|  |       tasks have been executed*. Only happens when executed with `KMP_TASKING=1`. | ||||||
|  | 
 | ||||||
|  | - **Runtime subsystem**: Internal operations of the runtime. | ||||||
|  | 
 | ||||||
|  |     - **Attached**: Present after the call to `nosv_attach()` and before | ||||||
|  |       `nosv_detach()`. This state is a hack. | ||||||
|  | 
 | ||||||
|  |     - **Fork call**: Preparing a parallel section using the fork-join model. | ||||||
|  |       Only called from the master thread. | ||||||
|  | 
 | ||||||
|  |     - **Init**: Initializing the OpenMP-V runtime. | ||||||
|  | 
 | ||||||
|  |     - **Internal microtask**: Running a internal OpenMP-V function as a microtask. | ||||||
|  | 
 | ||||||
|  |     - **User microtask**: Running user code as a microtask in a worker thread. | ||||||
|  | 
 | ||||||
|  |     - **Worker main Loop**: Running the main loop, where the workers run the | ||||||
|  |       fork barrier, run a microtask and perform a join barrier until there is no | ||||||
|  |       more work. | ||||||
|  | 
 | ||||||
|  | !!! Note | ||||||
|  | 
 | ||||||
|  |     The generated HTML version of the OpenMP 5.2 specification has some parts | ||||||
|  |     missing, so we link directly to the PDF file which may not work in some | ||||||
|  |     browsers. | ||||||
|  | 
 | ||||||
|  | [workdis]:  https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.11 | ||||||
|  | [tasking]:  https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#chapter.12 | ||||||
|  | [critical]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.2 | ||||||
|  | [barrier]:  https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.3 | ||||||
| 
 | 
 | ||||||
| ## Limitations | ## Limitations | ||||||
| 
 | 
 | ||||||
| By the way how OpenMP is implemented. There are some instrumentation points that | As the compiler generates the code that perform the calls to the OpenMP-V | ||||||
| violate ovni subsystem rules. This mostly happens because some directives are lowered | runtime, there are some parts of the execution that are complicated to | ||||||
| partially in the transformed user code, so it is not easy to wrap them into a | instrument by just placing a pair of events to delimite a function. | ||||||
| Single-entry single-exit (SESE) region, like we would do with a regular task invocation, |  | ||||||
| for example. |  | ||||||
| 
 | 
 | ||||||
| All problematic directives are described here so the user is able to understand what | For those cases we use an approximation which is documented in the following | ||||||
| is being show in the traces | subsections. | ||||||
| 
 | 
 | ||||||
| - **Task if0**: The lowered user code of a task if0 is: | ### Dynamic for | ||||||
|   ... = __kmpc_omp_task_alloc(...); |  | ||||||
|   __kmpc_omp_taskwait_deps_51(...); // If task has dependencies |  | ||||||
|   __kmpc_omp_task_begin_if0(...); |  | ||||||
|   // Call to the user code |  | ||||||
|   omp_task_entry_(...); |  | ||||||
|   __kmpc_omp_task_complete_if0(...); |  | ||||||
| 
 | 
 | ||||||
|   Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As | The generated code of a *dynamic for* has the following structure: | ||||||
|   this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0` |  | ||||||
|   and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent. |  | ||||||
| 
 | 
 | ||||||
| - **For static**: The lowered user code of a for static is: | ```c | ||||||
|   // Parallel code | __kmpc_dispatch_init_4(...); | ||||||
|   __kmpc_for_static_init_4(...); | while (__kmpc_dispatch_next_4(...)) { | ||||||
|   for ( i = ...; i <= ...; ++i ) |     for (i = ...; i <= ...; i++) { | ||||||
|     ;  |         // User code ... | ||||||
|   __kmpc_for_static_fini(...); |     } | ||||||
|  | } | ||||||
|  | ``` | ||||||
| 
 | 
 | ||||||
|   Ideally, the for loop should be called by the runtime to ensure the SESE structure. As | The function `__kmpc_dispatch_next_4()` returns `true` if there are more | ||||||
|   this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4` | chunks (group of iterations) to be executed by the thread, otherwise it returns | ||||||
|   and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent. | `false`. | ||||||
| 
 | 
 | ||||||
| - **For dynamic**: The lowered user code of a for dynamic is: | Ideally we want to instrument each chunk with a pair of begin and end events. | ||||||
| 
 | 
 | ||||||
|   __kmpc_dispatch_init_4(...); | The problem with the instrumentation is that there is no easy way of determining | ||||||
|   while ( __kmpc_dispatch_next_4(...)) | if the call to `__kmpc_dispatch_next_4()` is processing the first chunk, just | ||||||
|   { | after `__kmpc_dispatch_init_4()`, or is coming from other chunks due to the | ||||||
|     for ( i = ...; i <= ...; ++i ) | while loop. | ||||||
|       ; |  | ||||||
|   } |  | ||||||
| 
 | 
 | ||||||
|   Ideally, the for loop should be called by the runtime to ensure the SESE structure. As | Therefore, from the `__kmpc_dispatch_next_4()` alone, we cannot determine if we | ||||||
|   this code is generated by the compiler the subsystem view shows: | need to only emit a single "begin a new chunk" event or we need to emit the pair | ||||||
|   1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init** | of events "finish the last chunk" and "begin a new one". | ||||||
|   2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`. |  | ||||||
|   with the event **For dynamic chunk**. |  | ||||||
|   3. How long it takes to run a loop iteration chunk between the last and the previous |  | ||||||
|   `__kmpc_dispatch_next_4` call with the event **For dynamic chunk**. |  | ||||||
| 
 | 
 | ||||||
|  | So, as a workaround, we emit an event from the end of `__kmpc_dispatch_init_4()` | ||||||
|  | starting a new chunk (which is fake), and then from `__kmpc_dispatch_next_4()` we | ||||||
|  | always emit the "finish the last chunk" and "begin a new one" events (unless | ||||||
|  | there are no more chunks, in which case we don't emit the "begin a new one" | ||||||
|  | event). | ||||||
|  | 
 | ||||||
|  | This will cause an spurious *Work-distribution: Dynamic for chunk* state at the | ||||||
|  | beginning of each dynamic for, which should be very short and is not really a | ||||||
|  | chunk. | ||||||
|  | 
 | ||||||
|  | ### Static for | ||||||
|  | 
 | ||||||
|  | The generated code of an *static for* has the following structure: | ||||||
|  | 
 | ||||||
|  | ```c | ||||||
|  | __kmpc_for_static_init_4(...); | ||||||
|  | for (i = ...; i <= ...; i++) { | ||||||
|  |     // User code ... | ||||||
|  | } | ||||||
|  | __kmpc_for_static_fini(...); | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | As this code is generated by the compiler we cannot easily add the begin/end | ||||||
|  | pair of events to mark the *Work-distribution: Static for chunk* state. | ||||||
|  | 
 | ||||||
|  | We assume that by placing the "begin processing a chunk" event at the end of | ||||||
|  | `__kmpc_for_static_init_4()` and the "end processing the chunk" event at | ||||||
|  | the beginning of `__kmpc_for_static_fini()` is equivalent to adding the | ||||||
|  | events surrounding the for loop. | ||||||
|  | 
 | ||||||
|  | ### Task if0 | ||||||
|  | 
 | ||||||
|  | The generated code of an *if0 task* has the following structure: | ||||||
|  | 
 | ||||||
|  | ```c | ||||||
|  | ... = __kmpc_omp_task_alloc(...); | ||||||
|  | __kmpc_omp_taskwait_deps_51(...); // If task has dependencies | ||||||
|  | __kmpc_omp_task_begin_if0(...); | ||||||
|  | // Call to the user code | ||||||
|  | omp_task_entry_(...); | ||||||
|  | __kmpc_omp_task_complete_if0(...); | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | Instead of injecting the begin and end events in the user code, we | ||||||
|  | approximate it by placing the "begin if0 task" event at the end of the | ||||||
|  | `__kmpc_omp_task_begin_if0` function and the "end if0 task" event at the | ||||||
|  | beginning of `__kmpc_omp_task_complete_if0`. This state will be shown as  | ||||||
|  | *Task: Running task if0*. | ||||||
|  | |||||||
							
								
								
									
										8
									
								
								flake.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										8
									
								
								flake.lock
									
									
									
										generated
									
									
									
								
							| @ -7,11 +7,11 @@ | |||||||
|         ] |         ] | ||||||
|       }, |       }, | ||||||
|       "locked": { |       "locked": { | ||||||
|         "lastModified": 1701968480, |         "lastModified": 1705310446, | ||||||
|         "narHash": "sha256-YoKN8FZllNQfpEpMqGOBv77kp9J0mlVRlhixWbcDqWg=", |         "narHash": "sha256-PaPnkGotb2omIV6OsS72MGkqNN6Q/iHLlXQZ6S3vWOY=", | ||||||
|         "ref": "refs/heads/master", |         "ref": "refs/heads/master", | ||||||
|         "rev": "c4d5135fde108401417fdcdf5e1c8d11aeca4f32", |         "rev": "3b21a32d835ff06741d5d59cd023ff2ae1ecb19f", | ||||||
|         "revCount": 931, |         "revCount": 932, | ||||||
|         "type": "git", |         "type": "git", | ||||||
|         "url": "https://git.sr.ht/~rodarima/bscpkgs" |         "url": "https://git.sr.ht/~rodarima/bscpkgs" | ||||||
|       }, |       }, | ||||||
|  | |||||||
							
								
								
									
										35
									
								
								flake.nix
									
									
									
									
									
								
							
							
						
						
									
										35
									
								
								flake.nix
									
									
									
									
									
								
							| @ -7,11 +7,15 @@ | |||||||
| 
 | 
 | ||||||
|   outputs = { self, nixpkgs, bscpkgs }: |   outputs = { self, nixpkgs, bscpkgs }: | ||||||
|   let |   let | ||||||
|  |     # Set to true to replace all libovni in all runtimes with the current | ||||||
|  |     # source. Causes large rebuilds on changes of ovni. | ||||||
|  |     useLocalOvni = false; | ||||||
|  | 
 | ||||||
|     ovniOverlay = final: prev: { |     ovniOverlay = final: prev: { | ||||||
|       nosv = prev.nosv.override { |       nosv = prev.nosv.override { | ||||||
|         useGit = true; |         useGit = true; | ||||||
|         gitBranch = "master"; |         gitBranch = "master"; | ||||||
|         gitCommit = "6a63fd4378ba458243dda3159500c1450edf0e82"; |         gitCommit = "9abad7d31476e97842d3b42f1fc1fb03d4cf817b"; | ||||||
|       }; |       }; | ||||||
|       nanos6 = prev.nanos6.override { |       nanos6 = prev.nanos6.override { | ||||||
|         useGit = true; |         useGit = true; | ||||||
| @ -23,13 +27,27 @@ | |||||||
|         gitBranch = "master"; |         gitBranch = "master"; | ||||||
|         gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f"; |         gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f"; | ||||||
|       }; |       }; | ||||||
|  |       clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override { | ||||||
|  |         useGit = true; | ||||||
|  |         gitBranch = "master"; | ||||||
|  |         gitCommit = "9dc4a4deea5e09850435782026eaae2f5290d886"; | ||||||
|  |       }; | ||||||
|  | 
 | ||||||
|  |       # Use a fixed commit for libovni | ||||||
|  |       ovniFixed = prev.ovni.override { | ||||||
|  |         useGit = true; | ||||||
|  |         gitBranch = "master"; | ||||||
|  |         gitCommit = "68fc8b0eba299c3a7fa3833ace2c94933a26749e"; | ||||||
|  |       }; | ||||||
|       # Build with the current source |       # Build with the current source | ||||||
|       ovni = prev.ovni.overrideAttrs (old: rec { |       ovniLocal = prev.ovni.overrideAttrs (old: rec { | ||||||
|         pname = "ovni-local"; |         pname = "ovni-local"; | ||||||
|         version = if self ? shortRev then self.shortRev else "dirty"; |         version = if self ? shortRev then self.shortRev else "dirty"; | ||||||
|         src = self; |         src = self; | ||||||
|         cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ]; |         cmakeFlags = [ "-DOVNI_GIT_COMMIT=${version}" ]; | ||||||
|       }); |       }); | ||||||
|  |       # Select correct ovni for libovni | ||||||
|  |       ovni = if (useLocalOvni) then final.ovniLocal else final.ovniFixed; | ||||||
|     }; |     }; | ||||||
|     pkgs = import nixpkgs { |     pkgs = import nixpkgs { | ||||||
|       system = "x86_64-linux"; |       system = "x86_64-linux"; | ||||||
| @ -51,12 +69,12 @@ | |||||||
|     ]; |     ]; | ||||||
|     lib = pkgs.lib; |     lib = pkgs.lib; | ||||||
|   in { |   in { | ||||||
|     packages.x86_64-linux.ovniPackages = rec { |     packages.x86_64-linux.ovniPackages = { | ||||||
|  |       # Allow inspection of packages from the command line | ||||||
|  |       inherit pkgs; | ||||||
|  |     } // rec { | ||||||
|       # Build with the current source |       # Build with the current source | ||||||
|       local = pkgs.ovni.overrideAttrs (old: { |       local = pkgs.ovniLocal; | ||||||
|         pname = "ovni-local"; |  | ||||||
|         src = self; |  | ||||||
|       }); |  | ||||||
| 
 | 
 | ||||||
|       # Build in Debug mode |       # Build in Debug mode | ||||||
|       debug = local.overrideAttrs (old: { |       debug = local.overrideAttrs (old: { | ||||||
| @ -97,12 +115,13 @@ | |||||||
|         # We need to be able to exit the chroot to run Nanos6 tests, as they |         # We need to be able to exit the chroot to run Nanos6 tests, as they | ||||||
|         # require access to /sys for hwloc |         # require access to /sys for hwloc | ||||||
|         __noChroot = true; |         __noChroot = true; | ||||||
|         buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes ]); |         buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes openmpv ]); | ||||||
|         cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ]; |         cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ]; | ||||||
|         preConfigure = old.preConfigure or "" + '' |         preConfigure = old.preConfigure or "" + '' | ||||||
|           export NOSV_HOME="${pkgs.nosv}" |           export NOSV_HOME="${pkgs.nosv}" | ||||||
|           export NODES_HOME="${pkgs.nodes}" |           export NODES_HOME="${pkgs.nodes}" | ||||||
|           export NANOS6_HOME="${pkgs.nanos6}" |           export NANOS6_HOME="${pkgs.nanos6}" | ||||||
|  |           export OPENMP_RUNTIME="libompv" | ||||||
|         ''; |         ''; | ||||||
|       }); |       }); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -37,6 +37,7 @@ nav: | |||||||
|       - user/emulation/nanos6.md |       - user/emulation/nanos6.md | ||||||
|       - user/emulation/tampi.md |       - user/emulation/tampi.md | ||||||
|       - user/emulation/mpi.md |       - user/emulation/mpi.md | ||||||
|  |       - user/emulation/openmp.md | ||||||
|       - user/emulation/events.md |       - user/emulation/events.md | ||||||
|     - CHANGELOG.md |     - CHANGELOG.md | ||||||
|   - 'Developer guide': |   - 'Developer guide': | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| /* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
 | /* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
 | ||||||
|  * SPDX-License-Identifier: GPL-3.0-or-later */ |  * SPDX-License-Identifier: GPL-3.0-or-later */ | ||||||
| 
 | 
 | ||||||
| #include "openmp_priv.h" | #include "openmp_priv.h" | ||||||
| @ -14,53 +14,83 @@ | |||||||
| enum { PUSH = 1, POP = 2, IGN = 3 }; | enum { PUSH = 1, POP = 2, IGN = 3 }; | ||||||
| 
 | 
 | ||||||
| static const int fn_table[256][256][3] = { | static const int fn_table[256][256][3] = { | ||||||
| 	['A'] = { |  | ||||||
| 		['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED }, |  | ||||||
| 		[']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED }, |  | ||||||
| 	}, |  | ||||||
| 	['B'] = { | 	['B'] = { | ||||||
| 		['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER }, | 		['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_PLAIN }, | ||||||
| 		['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER }, | 		['B'] = { CH_SUBSYSTEM, POP,  ST_BARRIER_PLAIN }, | ||||||
| 		['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER }, | 		['j'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_JOIN }, | ||||||
| 		['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER }, | 		['J'] = { CH_SUBSYSTEM, POP,  ST_BARRIER_JOIN }, | ||||||
| 		['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER }, | 		['f'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_FORK }, | ||||||
| 		['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER }, | 		['F'] = { CH_SUBSYSTEM, POP,  ST_BARRIER_FORK }, | ||||||
| 		['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT }, | 		['t'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER_TASK }, | ||||||
| 		['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT }, | 		['T'] = { CH_SUBSYSTEM, POP,  ST_BARRIER_TASK }, | ||||||
|  | 		['s'] = { CH_SUBSYSTEM, IGN,  ST_BARRIER_SPIN_WAIT }, | ||||||
|  | 		['S'] = { CH_SUBSYSTEM, IGN,  ST_BARRIER_SPIN_WAIT }, | ||||||
|  | 	}, | ||||||
|  | 	['I'] = { | ||||||
|  | 		['a'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_ACQ }, | ||||||
|  | 		['A'] = { CH_SUBSYSTEM, POP,  ST_CRITICAL_ACQ }, | ||||||
|  | 		['r'] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_REL }, | ||||||
|  | 		['R'] = { CH_SUBSYSTEM, POP,  ST_CRITICAL_REL }, | ||||||
|  | 		['['] = { CH_SUBSYSTEM, PUSH, ST_CRITICAL_SECTION }, | ||||||
|  | 		[']'] = { CH_SUBSYSTEM, POP,  ST_CRITICAL_SECTION }, | ||||||
| 	}, | 	}, | ||||||
| 	['W'] = { | 	['W'] = { | ||||||
| 		['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC }, | 		['d'] = { CH_SUBSYSTEM, PUSH, ST_WD_DISTRIBUTE }, | ||||||
| 		['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC }, | 		['D'] = { CH_SUBSYSTEM, POP,  ST_WD_DISTRIBUTE }, | ||||||
| 		['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT }, | 		['c'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_CHUNK }, | ||||||
| 		['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT }, | 		['C'] = { CH_SUBSYSTEM, POP,  ST_WD_FOR_DYNAMIC_CHUNK }, | ||||||
| 		['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK }, | 		['y'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_DYNAMIC_INIT }, | ||||||
| 		['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK }, | 		['Y'] = { CH_SUBSYSTEM, POP,  ST_WD_FOR_DYNAMIC_INIT }, | ||||||
| 		['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE }, | 		['s'] = { CH_SUBSYSTEM, PUSH, ST_WD_FOR_STATIC }, | ||||||
| 		['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE }, | 		['S'] = { CH_SUBSYSTEM, POP,  ST_WD_FOR_STATIC }, | ||||||
|  | 		['e'] = { CH_SUBSYSTEM, PUSH, ST_WD_SECTION }, | ||||||
|  | 		['E'] = { CH_SUBSYSTEM, POP,  ST_WD_SECTION }, | ||||||
|  | 		['i'] = { CH_SUBSYSTEM, PUSH, ST_WD_SINGLE }, | ||||||
|  | 		['I'] = { CH_SUBSYSTEM, POP,  ST_WD_SINGLE }, | ||||||
| 	}, | 	}, | ||||||
| 	['T'] = { | 	['T'] = { | ||||||
| 		['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS }, |  | ||||||
| 		['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS }, |  | ||||||
| 		['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS }, |  | ||||||
| 		['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS }, |  | ||||||
| 		['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK }, |  | ||||||
| 		[']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK }, |  | ||||||
| 		['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 }, |  | ||||||
| 		['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 }, |  | ||||||
| 		['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC }, | 		['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC }, | ||||||
| 		['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC }, | 		['A'] = { CH_SUBSYSTEM, POP,  ST_TASK_ALLOC }, | ||||||
| 		['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE }, | 		['c'] = { CH_SUBSYSTEM, PUSH, ST_TASK_CHECK_DEPS }, | ||||||
| 		['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE }, | 		['C'] = { CH_SUBSYSTEM, POP,  ST_TASK_CHECK_DEPS }, | ||||||
| 		['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT }, |  | ||||||
| 		['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT }, |  | ||||||
| 		['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD }, |  | ||||||
| 		['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD }, |  | ||||||
| 		['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC }, | 		['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC }, | ||||||
| 		['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC }, | 		['D'] = { CH_SUBSYSTEM, POP,  ST_TASK_DUP_ALLOC }, | ||||||
| 		['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS }, | 		['r'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RELEASE_DEPS }, | ||||||
| 		['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS }, | 		['R'] = { CH_SUBSYSTEM, POP,  ST_TASK_RELEASE_DEPS }, | ||||||
| 		['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP }, | 		['['] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN }, | ||||||
| 		['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP }, | 		[']'] = { CH_SUBSYSTEM, POP,  ST_TASK_RUN }, | ||||||
|  | 		['i'] = { CH_SUBSYSTEM, PUSH, ST_TASK_RUN_IF0 }, | ||||||
|  | 		['I'] = { CH_SUBSYSTEM, POP,  ST_TASK_RUN_IF0 }, | ||||||
|  | 		['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE }, | ||||||
|  | 		['S'] = { CH_SUBSYSTEM, POP,  ST_TASK_SCHEDULE }, | ||||||
|  | 		['g'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKGROUP }, | ||||||
|  | 		['G'] = { CH_SUBSYSTEM, POP,  ST_TASK_TASKGROUP }, | ||||||
|  | 		['t'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT }, | ||||||
|  | 		['T'] = { CH_SUBSYSTEM, POP,  ST_TASK_TASKWAIT }, | ||||||
|  | 		['w'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKWAIT_DEPS }, | ||||||
|  | 		['W'] = { CH_SUBSYSTEM, POP,  ST_TASK_TASKWAIT_DEPS }, | ||||||
|  | 		['y'] = { CH_SUBSYSTEM, PUSH, ST_TASK_TASKYIELD }, | ||||||
|  | 		['Y'] = { CH_SUBSYSTEM, POP,  ST_TASK_TASKYIELD }, | ||||||
|  | 	}, | ||||||
|  | 	['A'] = { | ||||||
|  | 		['['] = { CH_SUBSYSTEM, PUSH, ST_RT_ATTACHED }, | ||||||
|  | 		[']'] = { CH_SUBSYSTEM, POP,  ST_RT_ATTACHED }, | ||||||
|  | 	}, | ||||||
|  | 	['M'] = { | ||||||
|  | 		['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_INTERNAL }, | ||||||
|  | 		['I'] = { CH_SUBSYSTEM, POP,  ST_RT_MICROTASK_INTERNAL }, | ||||||
|  | 		['u'] = { CH_SUBSYSTEM, PUSH, ST_RT_MICROTASK_USER }, | ||||||
|  | 		['U'] = { CH_SUBSYSTEM, POP,  ST_RT_MICROTASK_USER }, | ||||||
|  | 	}, | ||||||
|  | 	['H'] = { | ||||||
|  | 		['['] = { CH_SUBSYSTEM, PUSH, ST_RT_WORKER_LOOP }, | ||||||
|  | 		[']'] = { CH_SUBSYSTEM, POP,  ST_RT_WORKER_LOOP }, | ||||||
|  | 	}, | ||||||
|  | 	['C'] = { | ||||||
|  | 		['i'] = { CH_SUBSYSTEM, PUSH, ST_RT_INIT }, | ||||||
|  | 		['I'] = { CH_SUBSYSTEM, POP,  ST_RT_INIT }, | ||||||
|  | 		['f'] = { CH_SUBSYSTEM, PUSH, ST_RT_FORK_CALL }, | ||||||
|  | 		['F'] = { CH_SUBSYSTEM, POP,  ST_RT_FORK_CALL }, | ||||||
| 	}, | 	}, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| /* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
 | /* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
 | ||||||
|  * SPDX-License-Identifier: GPL-3.0-or-later */ |  * SPDX-License-Identifier: GPL-3.0-or-later */ | ||||||
| 
 | 
 | ||||||
| #ifndef OPENMP_PRIV_H | #ifndef OPENMP_PRIV_H | ||||||
| @ -15,28 +15,42 @@ enum openmp_chan { | |||||||
| 	CH_MAX, | 	CH_MAX, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| enum openmp_function_values { | enum openmp_function_values { | ||||||
| 	ST_ATTACHED = 1, | 	ST_BARRIER_FORK = 1, | ||||||
| 	ST_JOIN_BARRIER, | 	ST_BARRIER_JOIN, | ||||||
| 	ST_BARRIER, | 	ST_BARRIER_PLAIN, | ||||||
| 	ST_TASKING_BARRIER, | 	ST_BARRIER_SPIN_WAIT, | ||||||
| 	ST_SPIN_WAIT, | 	ST_BARRIER_TASK, | ||||||
| 	ST_FOR_STATIC, | 	/* Critical */ | ||||||
| 	ST_FOR_DYNAMIC_INIT, | 	ST_CRITICAL_ACQ, | ||||||
| 	ST_FOR_DYNAMIC_CHUNK, | 	ST_CRITICAL_REL, | ||||||
| 	ST_SINGLE, | 	ST_CRITICAL_SECTION, | ||||||
| 	ST_RELEASE_DEPS, | 	/* Work-distribution */ | ||||||
| 	ST_TASKWAIT_DEPS, | 	ST_WD_DISTRIBUTE, | ||||||
| 	ST_INVOKE_TASK, | 	ST_WD_FOR_DYNAMIC_CHUNK, | ||||||
| 	ST_INVOKE_TASK_IF0, | 	ST_WD_FOR_DYNAMIC_INIT, | ||||||
|  | 	ST_WD_FOR_STATIC, | ||||||
|  | 	ST_WD_SECTION, | ||||||
|  | 	ST_WD_SINGLE, | ||||||
|  | 	/* Task */ | ||||||
| 	ST_TASK_ALLOC, | 	ST_TASK_ALLOC, | ||||||
| 	ST_TASK_SCHEDULE, | 	ST_TASK_CHECK_DEPS, | ||||||
| 	ST_TASKWAIT, |  | ||||||
| 	ST_TASKYIELD, |  | ||||||
| 	ST_TASK_DUP_ALLOC, | 	ST_TASK_DUP_ALLOC, | ||||||
| 	ST_CHECK_DEPS, | 	ST_TASK_RELEASE_DEPS, | ||||||
| 	ST_TASKGROUP, | 	ST_TASK_RUN, | ||||||
|  | 	ST_TASK_RUN_IF0, | ||||||
|  | 	ST_TASK_SCHEDULE, | ||||||
|  | 	ST_TASK_TASKGROUP, | ||||||
|  | 	ST_TASK_TASKWAIT, | ||||||
|  | 	ST_TASK_TASKWAIT_DEPS, | ||||||
|  | 	ST_TASK_TASKYIELD, | ||||||
|  | 	/* Runtime */ | ||||||
|  | 	ST_RT_ATTACHED, | ||||||
|  | 	ST_RT_FORK_CALL, | ||||||
|  | 	ST_RT_INIT, | ||||||
|  | 	ST_RT_MICROTASK_INTERNAL, | ||||||
|  | 	ST_RT_MICROTASK_USER, | ||||||
|  | 	ST_RT_WORKER_LOOP, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct openmp_thread { | struct openmp_thread { | ||||||
|  | |||||||
| @ -26,29 +26,44 @@ static const char model_name[] = "openmp"; | |||||||
| enum { model_id = 'P' }; | enum { model_id = 'P' }; | ||||||
| 
 | 
 | ||||||
| static struct ev_decl model_evlist[] = { | static struct ev_decl model_evlist[] = { | ||||||
| 	PAIR_E("PA[", "PA]", "the attached state") | 	PAIR_B("PBb", "PBB", "plain barrier") | ||||||
|  | 	PAIR_B("PBj", "PBJ", "join barrier") | ||||||
|  | 	PAIR_B("PBf", "PBF", "fork barrier") | ||||||
|  | 	PAIR_B("PBt", "PBT", "tasking barrier") | ||||||
|  | 	PAIR_B("PBs", "PBS", "spin wait") | ||||||
| 
 | 
 | ||||||
| 	PAIR_E("PBj", "PBJ", "a join barrier") | 	PAIR_B("PIa", "PIA", "critical acquiring") | ||||||
| 	PAIR_E("PBb", "PBB", "a barrier") | 	PAIR_B("PIr", "PIR", "critical releasing") | ||||||
| 	PAIR_E("PBt", "PBT", "a tasking barrier") | 	PAIR_B("PI[", "PI]", "critical section") | ||||||
| 	PAIR_E("PBs", "PBS", "a spin wait") |  | ||||||
| 
 | 
 | ||||||
| 	PAIR_B("PWs", "PWS", "static for") | 	PAIR_B("PWd", "PWD", "distribute") | ||||||
| 	PAIR_B("PWd", "PWD", "dynamic for init") | 	PAIR_B("PWy", "PWY", "dynamic for init") | ||||||
| 	PAIR_B("PWc", "PWC", "dynamic for chunk") | 	PAIR_B("PWc", "PWC", "dynamic for chunk") | ||||||
|  | 	PAIR_B("PWs", "PWS", "static for") | ||||||
|  | 	PAIR_B("PWe", "PWE", "section") | ||||||
| 	PAIR_B("PWi", "PWI", "single") | 	PAIR_B("PWi", "PWI", "single") | ||||||
| 
 | 
 | ||||||
| 	PAIR_B("PTr", "PTR", "releasing task dependencies") |  | ||||||
| 	PAIR_B("PTw", "PTW", "waiting for taskwait dependencies") |  | ||||||
| 	PAIR_B("PT[", "PT]", "invoking a task") |  | ||||||
| 	PAIR_B("PTi", "PTI", "invoking an if0 task") |  | ||||||
| 	PAIR_B("PTa", "PTA", "task allocation") | 	PAIR_B("PTa", "PTA", "task allocation") | ||||||
| 	PAIR_B("PTs", "PTS", "scheduling a task") |  | ||||||
| 	PAIR_E("PTt", "PTT", "a taskwait") |  | ||||||
| 	PAIR_E("PTy", "PTY", "a taskyield") |  | ||||||
| 	PAIR_B("PTd", "PTD", "duplicating a task") |  | ||||||
| 	PAIR_B("PTc", "PTC", "checking task dependencies") | 	PAIR_B("PTc", "PTC", "checking task dependencies") | ||||||
| 	PAIR_E("PTg", "PTG", "a taskgroup") | 	PAIR_B("PTd", "PTD", "duplicating a task") | ||||||
|  | 	PAIR_B("PTr", "PTR", "releasing task dependencies") | ||||||
|  | 	PAIR_B("PT[", "PT]", "running a task") | ||||||
|  | 	PAIR_B("PTi", "PTI", "running an if0 task") | ||||||
|  | 	PAIR_B("PTs", "PTS", "scheduling a task") | ||||||
|  | 	PAIR_B("PTg", "PTG", "a taskgroup") | ||||||
|  | 	PAIR_B("PTt", "PTT", "a taskwait") | ||||||
|  | 	PAIR_B("PTw", "PTW", "waiting for taskwait dependencies") | ||||||
|  | 	PAIR_B("PTy", "PTY", "a taskyield") | ||||||
|  | 
 | ||||||
|  | 	PAIR_E("PA[", "PA]", "the attached state") | ||||||
|  | 
 | ||||||
|  | 	PAIR_B("PMi", "PMI", "microtask internal") | ||||||
|  | 	PAIR_B("PMu", "PMU", "microtask user code") | ||||||
|  | 
 | ||||||
|  | 	PAIR_B("PH[", "PH]", "worker loop") | ||||||
|  | 
 | ||||||
|  | 	PAIR_B("PCf", "PCF", "fork call") | ||||||
|  | 	PAIR_B("PCi", "PCI", "initialization") | ||||||
| 
 | 
 | ||||||
| 	{ NULL, NULL }, | 	{ NULL, NULL }, | ||||||
| }; | }; | ||||||
| @ -75,6 +90,10 @@ static const int chan_stack[CH_MAX] = { | |||||||
| 	[CH_SUBSYSTEM] = 1, | 	[CH_SUBSYSTEM] = 1, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | static const int chan_dup[CH_MAX] = { | ||||||
|  | 	[CH_SUBSYSTEM] = 1, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /* ----------------- pvt ------------------ */ | /* ----------------- pvt ------------------ */ | ||||||
| 
 | 
 | ||||||
| static const int pvt_type[CH_MAX] = { | static const int pvt_type[CH_MAX] = { | ||||||
| @ -86,26 +105,42 @@ static const char *pcf_prefix[CH_MAX] = { | |||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct pcf_value_label openmp_subsystem_values[] = { | static const struct pcf_value_label openmp_subsystem_values[] = { | ||||||
| 	{ ST_ATTACHED,          "Attached" }, | 	/* Work-distribution */ | ||||||
| 	{ ST_JOIN_BARRIER,      "Join barrier" }, | 	{ ST_WD_DISTRIBUTE,          "Work-distribution: Distribute" }, | ||||||
| 	{ ST_BARRIER,           "Barrier" }, | 	{ ST_WD_FOR_DYNAMIC_CHUNK,   "Work-distribution: Dynamic for chunk" }, | ||||||
| 	{ ST_TASKING_BARRIER,   "Tasking barrier" }, | 	{ ST_WD_FOR_DYNAMIC_INIT,    "Work-distribution: Dynamic for initialization" }, | ||||||
| 	{ ST_SPIN_WAIT,         "Spin wait" }, | 	{ ST_WD_FOR_STATIC,          "Work-distribution: Static for chunk" }, | ||||||
| 	{ ST_FOR_STATIC,        "For static" }, | 	{ ST_WD_SECTION,             "Work-distribution: Section" }, | ||||||
| 	{ ST_FOR_DYNAMIC_INIT,  "For dynamic init" }, | 	{ ST_WD_SINGLE,              "Work-distribution: Single" }, | ||||||
| 	{ ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" }, | 	/* Task */ | ||||||
| 	{ ST_SINGLE,            "Single" }, | 	{ ST_TASK_ALLOC,             "Task: Allocation" }, | ||||||
| 	{ ST_RELEASE_DEPS,      "Release deps" }, | 	{ ST_TASK_CHECK_DEPS,        "Task: Check deps" }, | ||||||
| 	{ ST_TASKWAIT_DEPS,     "Taskwait deps" }, | 	{ ST_TASK_DUP_ALLOC,         "Task: Duplicating" }, | ||||||
| 	{ ST_INVOKE_TASK,       "Invoke task" }, | 	{ ST_TASK_RELEASE_DEPS,      "Task: Releasing deps" }, | ||||||
| 	{ ST_INVOKE_TASK_IF0,   "Invoke task if0" }, | 	{ ST_TASK_RUN,               "Task: Running task" }, | ||||||
| 	{ ST_TASK_ALLOC,        "Task alloc" }, | 	{ ST_TASK_RUN_IF0,           "Task: Running task if0" }, | ||||||
| 	{ ST_TASK_SCHEDULE,     "Task schedule" }, | 	{ ST_TASK_SCHEDULE,          "Task: Scheduling" }, | ||||||
| 	{ ST_TASKWAIT,          "Taskwait" }, | 	{ ST_TASK_TASKGROUP,         "Task: Taskgroup" }, | ||||||
| 	{ ST_TASKYIELD,         "Taskyield" }, | 	{ ST_TASK_TASKWAIT,          "Task: Taskwait" }, | ||||||
| 	{ ST_TASK_DUP_ALLOC,    "Task dup alloc" }, | 	{ ST_TASK_TASKWAIT_DEPS,     "Task: Taskwait deps" }, | ||||||
| 	{ ST_CHECK_DEPS,        "Check deps" }, | 	{ ST_TASK_TASKYIELD,         "Task: Taskyield" }, | ||||||
| 	{ ST_TASKGROUP,         "Taskgroup" }, | 	/* Critical */ | ||||||
|  | 	{ ST_CRITICAL_ACQ,           "Critical: Acquiring" }, | ||||||
|  | 	{ ST_CRITICAL_REL,           "Critical: Releasing" }, | ||||||
|  | 	{ ST_CRITICAL_SECTION,       "Critical: Section" }, | ||||||
|  | 	/* Barrier */ | ||||||
|  | 	{ ST_BARRIER_FORK,           "Barrier: Fork" }, | ||||||
|  | 	{ ST_BARRIER_JOIN,           "Barrier: Join" }, | ||||||
|  | 	{ ST_BARRIER_PLAIN,          "Barrier: Plain" }, | ||||||
|  | 	{ ST_BARRIER_TASK,           "Barrier: Task" }, | ||||||
|  | 	{ ST_BARRIER_SPIN_WAIT,      "Barrier: Spin wait" }, | ||||||
|  | 	/* Runtime */ | ||||||
|  | 	{ ST_RT_ATTACHED,            "Runtime: Attached" }, | ||||||
|  | 	{ ST_RT_FORK_CALL,           "Runtime: Fork call" }, | ||||||
|  | 	{ ST_RT_INIT,                "Runtime: Initialization" }, | ||||||
|  | 	{ ST_RT_MICROTASK_INTERNAL,  "Runtime: Internal microtask" }, | ||||||
|  | 	{ ST_RT_MICROTASK_USER,      "Runtime: User microtask" }, | ||||||
|  | 	{ ST_RT_WORKER_LOOP,         "Runtime: Worker main loop" }, | ||||||
| 	{ -1, NULL }, | 	{ -1, NULL }, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| @ -114,7 +149,7 @@ static const struct pcf_value_label *pcf_labels[CH_MAX] = { | |||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const long prv_flags[CH_MAX] = { | static const long prv_flags[CH_MAX] = { | ||||||
| 	[CH_SUBSYSTEM] = PRV_SKIPDUP, | 	[CH_SUBSYSTEM] = PRV_EMITDUP, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct model_pvt_spec pvt_spec = { | static const struct model_pvt_spec pvt_spec = { | ||||||
| @ -127,7 +162,7 @@ static const struct model_pvt_spec pvt_spec = { | |||||||
| /* ----------------- tracking ------------------ */ | /* ----------------- tracking ------------------ */ | ||||||
| 
 | 
 | ||||||
| static const int th_track[CH_MAX] = { | static const int th_track[CH_MAX] = { | ||||||
| 	[CH_SUBSYSTEM] = TRACK_TH_RUN, | 	[CH_SUBSYSTEM] = TRACK_TH_ACT, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const int cpu_track[CH_MAX] = { | static const int cpu_track[CH_MAX] = { | ||||||
| @ -141,6 +176,7 @@ static const struct model_chan_spec th_chan = { | |||||||
| 	.prefix = model_name, | 	.prefix = model_name, | ||||||
| 	.ch_names = chan_name, | 	.ch_names = chan_name, | ||||||
| 	.ch_stack = chan_stack, | 	.ch_stack = chan_stack, | ||||||
|  | 	.ch_dup = chan_dup, | ||||||
| 	.pvt = &pvt_spec, | 	.pvt = &pvt_spec, | ||||||
| 	.track = th_track, | 	.track = th_track, | ||||||
| }; | }; | ||||||
| @ -150,6 +186,7 @@ static const struct model_chan_spec cpu_chan = { | |||||||
| 	.prefix = model_name, | 	.prefix = model_name, | ||||||
| 	.ch_names = chan_name, | 	.ch_names = chan_name, | ||||||
| 	.ch_stack = chan_stack, | 	.ch_stack = chan_stack, | ||||||
|  | 	.ch_dup = chan_dup, | ||||||
| 	.pvt = &pvt_spec, | 	.pvt = &pvt_spec, | ||||||
| 	.track = cpu_track, | 	.track = cpu_track, | ||||||
| }; | }; | ||||||
|  | |||||||
| @ -4,3 +4,4 @@ | |||||||
| add_subdirectory(nanos6) | add_subdirectory(nanos6) | ||||||
| add_subdirectory(nodes) | add_subdirectory(nodes) | ||||||
| add_subdirectory(nosv) | add_subdirectory(nosv) | ||||||
|  | add_subdirectory(openmp) | ||||||
|  | |||||||
							
								
								
									
										50
									
								
								test/rt/openmp/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								test/rt/openmp/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,50 @@ | |||||||
|  | # Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC) | ||||||
|  | # SPDX-License-Identifier: GPL-3.0-or-later | ||||||
|  | 
 | ||||||
|  | check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND) | ||||||
|  | check_linker_flag(C "-fopenmp=libompv" OPENMPV_LINKER_FOUND) | ||||||
|  | cmake_path(GET CMAKE_C_COMPILER PARENT_PATH CMAKE_C_COMPILER_PATH) | ||||||
|  | 
 | ||||||
|  | if(NOT OPENMPV_COMPILER_FOUND OR NOT OPENMPV_LINKER_FOUND) | ||||||
|  |   if(ENABLE_ALL_TESTS) | ||||||
|  |     message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable OpenMP-V RT tests") | ||||||
|  |   else() | ||||||
|  |     message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling OpenMP-V RT tests") | ||||||
|  |   endif() | ||||||
|  |   return() | ||||||
|  | endif() | ||||||
|  | 
 | ||||||
|  | function(openmp_rt_test) | ||||||
|  |   ovni_test(${ARGN}) | ||||||
|  |   target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv" | ||||||
|  | 	  "-no-pedantic") | ||||||
|  |   target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv") | ||||||
|  |   target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m") | ||||||
|  |   set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY | ||||||
|  |     ENVIRONMENT "OMP_OVNI=1") | ||||||
|  |   set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY | ||||||
|  |     ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni") | ||||||
|  | endfunction() | ||||||
|  | 
 | ||||||
|  | openmp_rt_test(barrier-explicit.c) | ||||||
|  | openmp_rt_test(critical.c) | ||||||
|  | openmp_rt_test(if0-nested-task.c) | ||||||
|  | openmp_rt_test(if0.c) | ||||||
|  | openmp_rt_test(multi-parallels.c) | ||||||
|  | openmp_rt_test(parallel-for.c) | ||||||
|  | openmp_rt_test(parallel-loop.c) | ||||||
|  | openmp_rt_test(parallel-nested.c) | ||||||
|  | openmp_rt_test(parallel-task.c) | ||||||
|  | openmp_rt_test(sections.c) | ||||||
|  | openmp_rt_test(simple-task.c) | ||||||
|  | openmp_rt_test(task.c) | ||||||
|  | openmp_rt_test(taskloop.c) | ||||||
|  | openmp_rt_test(taskwait.c) | ||||||
|  | openmp_rt_test(team-distribute.c) | ||||||
|  | openmp_rt_test(worksharing-and-tasks.c) | ||||||
|  | openmp_rt_test(worksharing-mix.c) | ||||||
|  | openmp_rt_test(worksharing-task.c) | ||||||
|  | openmp_rt_test(worksharing.c) | ||||||
|  | openmp_rt_test(worksharing01.c) | ||||||
|  | openmp_rt_test(worksharing02.c) | ||||||
|  | openmp_rt_test(worksharing03.c) | ||||||
							
								
								
									
										47
									
								
								test/rt/openmp/barrier-explicit.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								test/rt/openmp/barrier-explicit.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,47 @@ | |||||||
|  | #include <stdio.h> | ||||||
|  | #include <math.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | #define N 100 | ||||||
|  | 
 | ||||||
|  | static void | ||||||
|  | dummy_work(double *x, int i) | ||||||
|  | { | ||||||
|  | 	sleep_us(i); | ||||||
|  | 	x[i] += sqrt((double) i); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	double x[N] = { 0 }; | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp single | ||||||
|  | 		{ | ||||||
|  | 			for (int i = 0; i < N; i++) { | ||||||
|  | 				#pragma omp task shared(x) | ||||||
|  | 				dummy_work(x, i); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		sleep_us(200); | ||||||
|  | 		#pragma omp barrier | ||||||
|  | 		sleep_us(1000); | ||||||
|  | 		#pragma omp barrier | ||||||
|  | 
 | ||||||
|  | 		#pragma omp single | ||||||
|  | 		{ | ||||||
|  | 			for (int i = 0; i < N; i++) { | ||||||
|  | 				#pragma omp task shared(x) | ||||||
|  | 				dummy_work(x, i); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	double sum = 0.0; | ||||||
|  | 	for (int i = 0; i < N; i++) | ||||||
|  | 		sum += x[i]; | ||||||
|  | 
 | ||||||
|  | 	printf("sum = %e\n", sum); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										16
									
								
								test/rt/openmp/critical.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								test/rt/openmp/critical.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		sleep_us(1000); | ||||||
|  | 
 | ||||||
|  | 		#pragma omp critical | ||||||
|  | 		sleep_us(200); | ||||||
|  | 
 | ||||||
|  | 		sleep_us(1000); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										20
									
								
								test/rt/openmp/if0-nested-task.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								test/rt/openmp/if0-nested-task.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	#pragma omp single | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp task if(0) | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp task | ||||||
|  | 			{ | ||||||
|  | 				sleep_us(1000); | ||||||
|  | 			} | ||||||
|  | 			#pragma omp taskwait | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
							
								
								
									
										17
									
								
								test/rt/openmp/if0.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								test/rt/openmp/if0.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | |||||||
|  | #include <omp.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	#pragma omp single | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp task if(0) | ||||||
|  | 		{ | ||||||
|  | 			sleep_us(1000); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
							
								
								
									
										13
									
								
								test/rt/openmp/multi-parallels.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								test/rt/openmp/multi-parallels.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	for (int i = 0; i < 10; i++) { | ||||||
|  | 		#pragma omp parallel | ||||||
|  | 		{ | ||||||
|  | 			sleep_us(1000); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										29
									
								
								test/rt/openmp/parallel-for.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								test/rt/openmp/parallel-for.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,29 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp for | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp for schedule(dynamic, 1) | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(i); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp for | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp for schedule(dynamic, 1) | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(i); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										14
									
								
								test/rt/openmp/parallel-loop.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								test/rt/openmp/parallel-loop.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp loop | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										22
									
								
								test/rt/openmp/parallel-nested.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								test/rt/openmp/parallel-nested.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp for schedule(dynamic, 1) | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(i); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp parallel | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp for schedule(dynamic, 1) | ||||||
|  | 			for (int i = 0; i < 100; i++) { | ||||||
|  | 				sleep_us(i); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										26
									
								
								test/rt/openmp/parallel-task.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								test/rt/openmp/parallel-task.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,26 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | static void foo(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp for schedule(dynamic, 1) | ||||||
|  | 	for (int i = 0; i < 100; i++) | ||||||
|  | 		sleep_us(i); | ||||||
|  | 
 | ||||||
|  | 	#pragma omp single | ||||||
|  | 	for (int i = 0; i < 100; i++) | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp task | ||||||
|  | 		sleep_us(10); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		foo(); | ||||||
|  | 		foo(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										51
									
								
								test/rt/openmp/sections.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								test/rt/openmp/sections.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,51 @@ | |||||||
|  | #include <stdio.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel sections | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp section | ||||||
|  | 		{ sleep_us(1001); printf("1001\n"); } | ||||||
|  | 		#pragma omp section | ||||||
|  | 		{ sleep_us(1002); printf("1002\n"); } | ||||||
|  | 		#pragma omp section | ||||||
|  | 		{ sleep_us(1003); printf("1003\n"); } | ||||||
|  | 		#pragma omp section | ||||||
|  | 		{ sleep_us(1004); printf("1004\n"); } | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1005); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1006); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1007); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1008); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1009); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1010); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1011); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1012); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1013); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1014); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1015); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1016); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1017); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1018); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1019); | ||||||
|  | 		#pragma omp section | ||||||
|  | 		sleep_us(1020); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										23
									
								
								test/rt/openmp/simple-task.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								test/rt/openmp/simple-task.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	int a; | ||||||
|  | 	int *p = &a; | ||||||
|  | 
 | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	#pragma omp single | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp task depend(out : p[0]) | ||||||
|  | 		{ | ||||||
|  | 			sleep_us(1000); | ||||||
|  | 		} | ||||||
|  | 		for (int i = 0; i < 10000; i++) | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp task depend(in : p[0]) | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										19
									
								
								test/rt/openmp/task.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								test/rt/openmp/task.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | |||||||
|  | #include <stdio.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		for (int i = 0; i < 10; i++) { | ||||||
|  | 			#pragma omp task | ||||||
|  | 			{ | ||||||
|  | 				printf("%d\n", i); | ||||||
|  | 				sleep_us(100); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		#pragma omp barrier | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										17
									
								
								test/rt/openmp/taskloop.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								test/rt/openmp/taskloop.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	#pragma omp single | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp taskloop | ||||||
|  | 		for (int i = 0; i < 10000; i++) | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp task | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										42
									
								
								test/rt/openmp/taskwait.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								test/rt/openmp/taskwait.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | #include <stdio.h> | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	#pragma omp single | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp task label("A") | ||||||
|  | 		{ | ||||||
|  | 			sleep_us(5000); | ||||||
|  | 			printf("A\n"); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp task label("B") | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp task label("B1") | ||||||
|  | 			{ | ||||||
|  | 				sleep_us(2000); | ||||||
|  | 				printf("B1\n"); | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			/* Shouldn't wait for task A */ | ||||||
|  | 			#pragma omp taskwait | ||||||
|  | 
 | ||||||
|  | 			#pragma omp task | ||||||
|  | 			{ | ||||||
|  | 				sleep_us(1000); | ||||||
|  | 				printf("B2\n"); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp task label("C") | ||||||
|  | 		{ | ||||||
|  | 			printf("C\n"); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Expected output C B1 B2 A */ | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										14
									
								
								test/rt/openmp/team-distribute.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								test/rt/openmp/team-distribute.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | #include <omp.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp teams num_teams(2) | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp distribute parallel for | ||||||
|  | 		for (volatile int i = 0; i < 1000; i++) | ||||||
|  | 			sleep_us(100 + i); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										34
									
								
								test/rt/openmp/worksharing-and-tasks.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								test/rt/openmp/worksharing-and-tasks.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,34 @@ | |||||||
|  | #include <stdio.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		//#pragma omp single nowait
 | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			#pragma omp task | ||||||
|  | 			sleep_us(10); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		/* Wait a bit for task allocation */ | ||||||
|  | 		sleep_us(1000); | ||||||
|  | 
 | ||||||
|  | 		/* Occupy 4 CPUs with sections */ | ||||||
|  | 		#pragma omp sections nowait | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(1001); printf("1001\n"); } | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(1002); printf("1002\n"); } | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(1003); printf("1003\n"); } | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(1004); printf("1004\n"); } | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp taskwait | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										66
									
								
								test/rt/openmp/worksharing-mix.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								test/rt/openmp/worksharing-mix.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | |||||||
|  | #include <omp.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | /* Test several work-distribution and task constructs, so we can generate a
 | ||||||
|  |  * trace that includes most of the states. */ | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp for | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp sections | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(101); printf("101\n"); } | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(102); printf("102\n"); } | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(103); printf("103\n"); } | ||||||
|  | 			#pragma omp section | ||||||
|  | 			{ sleep_us(104); printf("104\n"); } | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp for | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		#pragma omp single | ||||||
|  | 		for (int i = 0; i < 100; i++) | ||||||
|  | 		{ | ||||||
|  | 			#pragma omp task | ||||||
|  | 			sleep_us(10); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp critical | ||||||
|  | 		sleep_us(20); | ||||||
|  | 
 | ||||||
|  | 		#pragma omp barrier | ||||||
|  | 
 | ||||||
|  | 		#pragma omp for | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 		#pragma omp for schedule(dynamic, 1) | ||||||
|  | 		for (int i = 0; i < 100; i++) { | ||||||
|  | 			sleep_us(i); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// FIXME: Crashes OpenMP-V runtime
 | ||||||
|  | 	//#pragma omp distribute parallel for
 | ||||||
|  | 	//for (int i = 0; i < 1000; i++) {
 | ||||||
|  | 	//	sleep_us(1);
 | ||||||
|  | 	//}
 | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										25
									
								
								test/rt/openmp/worksharing-task.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								test/rt/openmp/worksharing-task.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | |||||||
|  | #include <omp.h> | ||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | static void foo(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp for | ||||||
|  | 	for (int i = 0; i < 100; ++i) | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp task | ||||||
|  | 		{ | ||||||
|  | 			sleep_us(1); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		foo(); | ||||||
|  | 		foo(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										16
									
								
								test/rt/openmp/worksharing.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								test/rt/openmp/worksharing.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	{ | ||||||
|  | 		#pragma omp for schedule(dynamic) ordered label("omp for dynamic") | ||||||
|  | 		for (int i = 0; i < 100; i++) | ||||||
|  | 			sleep_us(100); | ||||||
|  | 
 | ||||||
|  | 		#pragma omp single label("single") | ||||||
|  | 			sleep_us(1000); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										11
									
								
								test/rt/openmp/worksharing01.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								test/rt/openmp/worksharing01.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	#pragma omp for schedule(static, 30) | ||||||
|  | 	for (int i = 0; i < 100; i++) | ||||||
|  | 		sleep_us(10); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										12
									
								
								test/rt/openmp/worksharing02.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								test/rt/openmp/worksharing02.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp target | ||||||
|  | 	#pragma omp teams num_teams(1) | ||||||
|  | 	#pragma omp distribute dist_schedule(static, 30) | ||||||
|  | 	for (int i = 0; i < 100; i++) | ||||||
|  | 		sleep_us(10); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										11
									
								
								test/rt/openmp/worksharing03.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								test/rt/openmp/worksharing03.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | |||||||
|  | #include "compat.h" | ||||||
|  | 
 | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	#pragma omp parallel | ||||||
|  | 	#pragma omp for schedule(dynamic) | ||||||
|  | 	for (int i = 0; i < 100; i++) | ||||||
|  | 		sleep_us(10); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user