Add OpenMP emulation model
For now it only has support for subsystems Co-authored-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
This commit is contained in:
		
							parent
							
								
									454504d0cc
								
							
						
					
					
						commit
						bf2b3b73a0
					
				
							
								
								
									
										42
									
								
								cfg/cpu/openmp/subsystem.cfg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								cfg/cpu/openmp/subsystem.cfg
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | ||||
| #ParaverCFG | ||||
| ConfigFile.Version: 3.4 | ||||
| ConfigFile.NumWindows: 1 | ||||
| 
 | ||||
| 
 | ||||
| ################################################################################ | ||||
| < NEW DISPLAYING WINDOW CPU: OpenMP subsystem of the RUNNING thread > | ||||
| ################################################################################ | ||||
| window_name CPU: OpenMP subsystem of the RUNNING thread | ||||
| window_type single | ||||
| window_id 1 | ||||
| window_position_x 0 | ||||
| window_position_y 0 | ||||
| window_width 600 | ||||
| window_height 150 | ||||
| window_comm_lines_enabled true | ||||
| window_flags_enabled false | ||||
| window_noncolor_mode true | ||||
| window_logical_filtered true | ||||
| window_physical_filtered false | ||||
| window_comm_fromto true | ||||
| window_comm_tagsize true | ||||
| window_comm_typeval true | ||||
| window_units Microseconds | ||||
| window_maximum_y 1000.0 | ||||
| window_minimum_y 1.0 | ||||
| window_compute_y_max true | ||||
| window_level thread | ||||
| window_scale_relative 1.000000000000 | ||||
| window_end_time_relative 1.000000000000 | ||||
| window_object appl { 1, { All } } | ||||
| window_begin_time_relative 0.000000000000 | ||||
| window_open true | ||||
| window_drawmode draw_randnotzero | ||||
| window_drawmode_rows draw_randnotzero | ||||
| window_pixel_size 1 | ||||
| window_labels_to_draw 1 | ||||
| window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } | ||||
| window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } | ||||
| window_filter_module evt_type 1 50 | ||||
| window_filter_module evt_type_label 1 "CPU: OpenMP subsystem of the RUNNING thread" | ||||
| 
 | ||||
							
								
								
									
										42
									
								
								cfg/thread/openmp/subsystem.cfg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								cfg/thread/openmp/subsystem.cfg
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | ||||
| #ParaverCFG | ||||
| ConfigFile.Version: 3.4 | ||||
| ConfigFile.NumWindows: 1 | ||||
| 
 | ||||
| 
 | ||||
| ################################################################################ | ||||
| < NEW DISPLAYING WINDOW Thread: OpenMP subsystem of the RUNNING thread > | ||||
| ################################################################################ | ||||
| window_name Thread: OpenMP subsystem of the RUNNING thread | ||||
| window_type single | ||||
| window_id 1 | ||||
| window_position_x 0 | ||||
| window_position_y 0 | ||||
| window_width 600 | ||||
| window_height 150 | ||||
| window_comm_lines_enabled true | ||||
| window_flags_enabled false | ||||
| window_noncolor_mode true | ||||
| window_logical_filtered true | ||||
| window_physical_filtered false | ||||
| window_comm_fromto true | ||||
| window_comm_tagsize true | ||||
| window_comm_typeval true | ||||
| window_units Microseconds | ||||
| window_maximum_y 1000.0 | ||||
| window_minimum_y 1.0 | ||||
| window_compute_y_max true | ||||
| window_level thread | ||||
| window_scale_relative 1.000000000000 | ||||
| window_end_time_relative 1.000000000000 | ||||
| window_object appl { 1, { All } } | ||||
| window_begin_time_relative 0.000000000000 | ||||
| window_open true | ||||
| window_drawmode draw_randnotzero | ||||
| window_drawmode_rows draw_randnotzero | ||||
| window_pixel_size 1 | ||||
| window_labels_to_draw 1 | ||||
| window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } | ||||
| window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } | ||||
| window_filter_module evt_type 1 50 | ||||
| window_filter_module evt_type_label 1 "Thread: OpenMP subsystem of the RUNNING thread" | ||||
| 
 | ||||
| @ -429,6 +429,92 @@ List of events for the model *ovni* with identifier **`O`** at version `1.0.0`: | ||||
| <dd>leaves unordered event region</dd> | ||||
| </dl> | ||||
| 
 | ||||
| ## Model openmp | ||||
| 
 | ||||
| List of events for the model *openmp* with identifier **`P`** at version `1.1.0`: | ||||
| <dl> | ||||
| <dt><a id="PA[" href="#PA["><pre>PA[</pre></a></dt> | ||||
| <dd>enters the attached state</dd> | ||||
| <dt><a id="PA]" href="#PA]"><pre>PA]</pre></a></dt> | ||||
| <dd>leaves the attached state</dd> | ||||
| <dt><a id="PBj" href="#PBj"><pre>PBj</pre></a></dt> | ||||
| <dd>enters a join barrier</dd> | ||||
| <dt><a id="PBJ" href="#PBJ"><pre>PBJ</pre></a></dt> | ||||
| <dd>leaves a join barrier</dd> | ||||
| <dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt> | ||||
| <dd>enters a barrier</dd> | ||||
| <dt><a id="PBB" href="#PBB"><pre>PBB</pre></a></dt> | ||||
| <dd>leaves a barrier</dd> | ||||
| <dt><a id="PBt" href="#PBt"><pre>PBt</pre></a></dt> | ||||
| <dd>enters a tasking barrier</dd> | ||||
| <dt><a id="PBT" href="#PBT"><pre>PBT</pre></a></dt> | ||||
| <dd>leaves a tasking barrier</dd> | ||||
| <dt><a id="PBs" href="#PBs"><pre>PBs</pre></a></dt> | ||||
| <dd>enters a spin wait</dd> | ||||
| <dt><a id="PBS" href="#PBS"><pre>PBS</pre></a></dt> | ||||
| <dd>leaves a spin wait</dd> | ||||
| <dt><a id="PWs" href="#PWs"><pre>PWs</pre></a></dt> | ||||
| <dd>begins static for</dd> | ||||
| <dt><a id="PWS" href="#PWS"><pre>PWS</pre></a></dt> | ||||
| <dd>ceases static for</dd> | ||||
| <dt><a id="PWd" href="#PWd"><pre>PWd</pre></a></dt> | ||||
| <dd>begins dynamic for init</dd> | ||||
| <dt><a id="PWD" href="#PWD"><pre>PWD</pre></a></dt> | ||||
| <dd>ceases dynamic for init</dd> | ||||
| <dt><a id="PWc" href="#PWc"><pre>PWc</pre></a></dt> | ||||
| <dd>begins dynamic for chunk</dd> | ||||
| <dt><a id="PWC" href="#PWC"><pre>PWC</pre></a></dt> | ||||
| <dd>ceases dynamic for chunk</dd> | ||||
| <dt><a id="PWi" href="#PWi"><pre>PWi</pre></a></dt> | ||||
| <dd>begins single</dd> | ||||
| <dt><a id="PWI" href="#PWI"><pre>PWI</pre></a></dt> | ||||
| <dd>ceases single</dd> | ||||
| <dt><a id="PTr" href="#PTr"><pre>PTr</pre></a></dt> | ||||
| <dd>begins releasing task dependencies</dd> | ||||
| <dt><a id="PTR" href="#PTR"><pre>PTR</pre></a></dt> | ||||
| <dd>ceases releasing task dependencies</dd> | ||||
| <dt><a id="PTw" href="#PTw"><pre>PTw</pre></a></dt> | ||||
| <dd>begins waiting for taskwait dependencies</dd> | ||||
| <dt><a id="PTW" href="#PTW"><pre>PTW</pre></a></dt> | ||||
| <dd>ceases waiting for taskwait dependencies</dd> | ||||
| <dt><a id="PT[" href="#PT["><pre>PT[</pre></a></dt> | ||||
| <dd>begins invoking a task</dd> | ||||
| <dt><a id="PT]" href="#PT]"><pre>PT]</pre></a></dt> | ||||
| <dd>ceases invoking a task</dd> | ||||
| <dt><a id="PTi" href="#PTi"><pre>PTi</pre></a></dt> | ||||
| <dd>begins invoking an if0 task</dd> | ||||
| <dt><a id="PTI" href="#PTI"><pre>PTI</pre></a></dt> | ||||
| <dd>ceases invoking an if0 task</dd> | ||||
| <dt><a id="PTa" href="#PTa"><pre>PTa</pre></a></dt> | ||||
| <dd>begins task allocation</dd> | ||||
| <dt><a id="PTA" href="#PTA"><pre>PTA</pre></a></dt> | ||||
| <dd>ceases task allocation</dd> | ||||
| <dt><a id="PTs" href="#PTs"><pre>PTs</pre></a></dt> | ||||
| <dd>begins scheduling a task</dd> | ||||
| <dt><a id="PTS" href="#PTS"><pre>PTS</pre></a></dt> | ||||
| <dd>ceases scheduling a task</dd> | ||||
| <dt><a id="PTt" href="#PTt"><pre>PTt</pre></a></dt> | ||||
| <dd>enters a taskwait</dd> | ||||
| <dt><a id="PTT" href="#PTT"><pre>PTT</pre></a></dt> | ||||
| <dd>leaves a taskwait</dd> | ||||
| <dt><a id="PTy" href="#PTy"><pre>PTy</pre></a></dt> | ||||
| <dd>enters a taskyield</dd> | ||||
| <dt><a id="PTY" href="#PTY"><pre>PTY</pre></a></dt> | ||||
| <dd>leaves a taskyield</dd> | ||||
| <dt><a id="PTd" href="#PTd"><pre>PTd</pre></a></dt> | ||||
| <dd>begins duplicating a task</dd> | ||||
| <dt><a id="PTD" href="#PTD"><pre>PTD</pre></a></dt> | ||||
| <dd>ceases duplicating a task</dd> | ||||
| <dt><a id="PTc" href="#PTc"><pre>PTc</pre></a></dt> | ||||
| <dd>begins checking task dependencies</dd> | ||||
| <dt><a id="PTC" href="#PTC"><pre>PTC</pre></a></dt> | ||||
| <dd>ceases checking task dependencies</dd> | ||||
| <dt><a id="PTg" href="#PTg"><pre>PTg</pre></a></dt> | ||||
| <dd>enters a taskgroup</dd> | ||||
| <dt><a id="PTG" href="#PTG"><pre>PTG</pre></a></dt> | ||||
| <dd>leaves a taskgroup</dd> | ||||
| </dl> | ||||
| 
 | ||||
| ## Model tampi | ||||
| 
 | ||||
| List of events for the model *tampi* with identifier **`T`** at version `1.0.0`: | ||||
|  | ||||
							
								
								
									
										164
									
								
								doc/user/emulation/openmp.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								doc/user/emulation/openmp.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,164 @@ | ||||
| # OpenMP Model | ||||
| 
 | ||||
| The LLVM OpenMP Runtime is an integral component of the LLVM compiler | ||||
| infrastructure that provides support for the OpenMP (Open Multi-Processing) | ||||
| programming model. | ||||
| 
 | ||||
| OpenMP is a widely used API and set of directives for parallel programming, | ||||
| allowing developers to write multi-threaded and multi-process applications more | ||||
| easily. | ||||
| 
 | ||||
| This documentation is about an OpenMP runtime built on top of [nOS-V][nosv], | ||||
| leveraging its thread management capabilities while retaining the fundamental | ||||
| characteristics of the original runtime. | ||||
| 
 | ||||
| While the modifications introduced to the runtime may appear to be minor, it's | ||||
| important to note that this enhanced version is not API compatible with the | ||||
| original runtime. As a result, it is mandatory to use the clang built in the same | ||||
| [LLVM Project][llvm]. | ||||
| 
 | ||||
| This document describes all the instrumentation features included in the runtime | ||||
| by both nOS-V and OpenMP to monitor task execution and the execution flow within | ||||
| the runtime library to identify what is happening. This data is useful for both | ||||
| users and developers of the OpenMP runtime to analyze issues and undesired | ||||
| behaviors. | ||||
| 
 | ||||
| [llvm]: https://pm.bsc.es/llvm-ompss | ||||
| [nosv]: https://gitlab.bsc.es/nos-v/nos-v | ||||
| 
 | ||||
| ## How to Generate Execution Traces | ||||
| 
 | ||||
| In order to build the OpenMP runtime nOS-V must be provided by using | ||||
| `PKG_CONFIG_PATH` environment variable when configuring CMake. This results in a | ||||
| runtime without instrumentation. However, the user may be able to generate | ||||
| execution traces by enabling nOS-V instrumentation through | ||||
| `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. Note that this needs a | ||||
| nOS-V installation built with ovni. | ||||
| 
 | ||||
| Building OpenMP with instrumentation requires to pass ovni pkg-config path to | ||||
| `PKG_CONFIG_PATH` with a nosv installation compiled with ovni too. The reason is | ||||
| because OpenMP is dependent of nOS-V to generate complete execution traces. | ||||
| 
 | ||||
| By default, OpenMP will not instrument anything. To enable instrumentation the | ||||
| user must execute with `OMP_OVNI=1` and `NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`. | ||||
| 
 | ||||
| The following sections will describe the OpenMP execution trace views and what | ||||
| information is shown there. | ||||
| 
 | ||||
| ## nOS-V Task Type | ||||
| 
 | ||||
| As said in the previous sections. This OpenMP runtime is built on top of nOS-V. | ||||
| So the user can explore what does the execution do there. Here we only describe | ||||
| the task type view. For other views please take a look at the nOS-V chapter. | ||||
| 
 | ||||
| In OpenMP, every thread that is launched (main thread included) is shown in a task | ||||
| type with label "openmp". In a task application, every task call will be seen with | ||||
| a task type with label "file:line:col" format referring to the pragma location. This | ||||
| can be changed by using the clause label(string-literal). | ||||
| 
 | ||||
| OpenMP task if0 will not be shown here. Take a look at the section "Limitations" for | ||||
| more information. Nevertheless, the OpenMP task view shows it. | ||||
| 
 | ||||
| ## OpenMP Subsystem | ||||
| 
 | ||||
| This view illustrates the activities of each thread with different states: | ||||
| 
 | ||||
| - **Attached**: The thread is attached. | ||||
| 
 | ||||
| - **Join barrier**: The thread is in the implicit barrier of the parallel region. | ||||
| 
 | ||||
| - **Tasking barrier**: The thread is in the additional tasking barrier trying to | ||||
|   execute tasks. This event happens if executed with KMP_TASKING=1. | ||||
| 
 | ||||
| - **Spin wait**: The thread spin waits for a condition. Usually this event happens | ||||
|   in a barrier while waiting for the other threads to reach the barrier. The thread | ||||
|   also tries to execute tasks. | ||||
| 
 | ||||
| - **For static**: Executing a for static. The length of the event represents all the | ||||
|   chunks of iterations executed by the thread. See "Limitations" section. | ||||
| 
 | ||||
| - **For dynamic init**: Running the initialization of an OpenMP for dynamic. | ||||
| 
 | ||||
| - **For dynamic chunk**: Running a chunk of iterations of an OpenMP for dynamic. To | ||||
|   clarify. If a thread executes two chunks of iterations, let's say from 1 to 4 and | ||||
|   from 8 to 12, two different events will be shown. See "Limitations" section. | ||||
| 
 | ||||
| - **Single**: Running a Single region. All threads of the parallel region will emit | ||||
|   the event. | ||||
| 
 | ||||
| - **Release deps**: When finishing a task, trying to release dependencies. This | ||||
|   event happens although the task has no dependencies. | ||||
| 
 | ||||
| - **Taskwait deps**: Trying to execute tasks until dependencies have been fulfilled. | ||||
|   This appears typically in a task if0 with dependencies or a taskwait with deps. | ||||
| 
 | ||||
| - **Invoke task**: Executing a task. | ||||
| 
 | ||||
| - **Invoke task if0**: Executing a task if0. | ||||
| 
 | ||||
| - **Task alloc**: Allocating the task descriptor. | ||||
| 
 | ||||
| - **Task schedule**: Adding the task to the scheduler. | ||||
| 
 | ||||
| - **Taskwait**: Running a taskwait. | ||||
| 
 | ||||
| - **Taskyield**: Running a taskyield. | ||||
| 
 | ||||
| - **Task dup alloc**: Duplicating the task descriptor in a taskloop. | ||||
| 
 | ||||
| - **Check deps**: Checking if the task has pending dependencies to be fulfilled. This | ||||
|   means that if all dependencies are fulfilled the task will be scheduled. | ||||
| 
 | ||||
| - **Taskgroup**: Running a taskgroup. | ||||
| 
 | ||||
| ## Limitations | ||||
| 
 | ||||
| By the way how OpenMP is implemented. There are some instrumentation points that | ||||
| violate ovni subsystem rules. This mostly happens because some directives are lowered | ||||
| partially in the transformed user code, so it is not easy to wrap them into a | ||||
| Single-entry single-exit (SESE) region, like we would do with a regular task invocation, | ||||
| for example. | ||||
| 
 | ||||
| All problematic directives are described here so the user is able to understand what | ||||
| is being show in the traces | ||||
| 
 | ||||
| - **Task if0**: The lowered user code of a task if0 is: | ||||
|   ... = __kmpc_omp_task_alloc(...); | ||||
|   __kmpc_omp_taskwait_deps_51(...); // If task has dependencies | ||||
|   __kmpc_omp_task_begin_if0(...); | ||||
|   // Call to the user code | ||||
|   omp_task_entry_(...); | ||||
|   __kmpc_omp_task_complete_if0(...); | ||||
| 
 | ||||
|   Ideally, `omp_task_entry` should be called by the runtime to ensure the SESE structure. As | ||||
|   this code is generated by the compiler it is assumed that instrumenting `__kmpc_omp_task_begin_if0` | ||||
|   and `__kmpc_omp_task_complete_if0` as entry/exit points is safe and equivalent. | ||||
| 
 | ||||
| - **For static**: The lowered user code of a for static is: | ||||
|   // Parallel code | ||||
|   __kmpc_for_static_init_4(...); | ||||
|   for ( i = ...; i <= ...; ++i ) | ||||
|     ;  | ||||
|   __kmpc_for_static_fini(...); | ||||
| 
 | ||||
|   Ideally, the for loop should be called by the runtime to ensure the SESE structure. As | ||||
|   this code is generated by the compiler it is assumed that instrumenting `__kmpc_for_static_init_4` | ||||
|   and `__kmpc_for_static_fini` as entry/exit points is safe and equivalent. | ||||
| 
 | ||||
| - **For dynamic**: The lowered user code of a for dynamic is: | ||||
| 
 | ||||
|   __kmpc_dispatch_init_4(...); | ||||
|   while ( __kmpc_dispatch_next_4(...)) | ||||
|   { | ||||
|     for ( i = ...; i <= ...; ++i ) | ||||
|       ; | ||||
|   } | ||||
| 
 | ||||
|   Ideally, the for loop should be called by the runtime to ensure the SESE structure. As | ||||
|   this code is generated by the compiler the subsystem view shows: | ||||
|   1. How long it takes to run `__kmpc_dispatch_init_4` with the event **For dynamic init** | ||||
|   2. How long it takes to run from the end of 1. to the first `__kmpc_dispatch_next_4`. | ||||
|   with the event **For dynamic chunk**. | ||||
|   3. How long it takes to run a loop iteration chunk between the last and the previous | ||||
|   `__kmpc_dispatch_next_4` call with the event **For dynamic chunk**. | ||||
| 
 | ||||
| @ -61,6 +61,8 @@ add_library(emu STATIC | ||||
|   tampi/event.c | ||||
|   kernel/setup.c | ||||
|   kernel/event.c | ||||
|   openmp/setup.c | ||||
|   openmp/event.c | ||||
| ) | ||||
| target_link_libraries(emu ovni-static) | ||||
| 
 | ||||
|  | ||||
| @ -29,6 +29,7 @@ enum emu_prv_types { | ||||
| 	PRV_NANOS6_IDLE      = 40, | ||||
| 	PRV_NANOS6_BREAKDOWN = 41, | ||||
| 	PRV_KERNEL_CS        = 45, | ||||
| 	PRV_OPENMP_SUBSYSTEM = 50, | ||||
| 	PRV_RESERVED         = 100, | ||||
| }; | ||||
| 
 | ||||
|  | ||||
| @ -15,6 +15,7 @@ extern struct model_spec model_nodes; | ||||
| extern struct model_spec model_tampi; | ||||
| extern struct model_spec model_mpi; | ||||
| extern struct model_spec model_kernel; | ||||
| extern struct model_spec model_openmp; | ||||
| 
 | ||||
| static struct model_spec *models[] = { | ||||
| 	&model_ovni, | ||||
| @ -24,6 +25,7 @@ static struct model_spec *models[] = { | ||||
| 	&model_tampi, | ||||
| 	&model_mpi, | ||||
| 	&model_kernel, | ||||
| 	&model_openmp, | ||||
| 	NULL | ||||
| }; | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										111
									
								
								src/emu/openmp/event.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								src/emu/openmp/event.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,111 @@ | ||||
| /* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
 | ||||
|  * SPDX-License-Identifier: GPL-3.0-or-later */ | ||||
| 
 | ||||
| #include "openmp_priv.h" | ||||
| #include "chan.h" | ||||
| #include "common.h" | ||||
| #include "emu.h" | ||||
| #include "emu_ev.h" | ||||
| #include "extend.h" | ||||
| #include "model_thread.h" | ||||
| #include "thread.h" | ||||
| #include "value.h" | ||||
| 
 | ||||
| enum { PUSH = 1, POP = 2, IGN = 3 }; | ||||
| 
 | ||||
| static const int fn_table[256][256][3] = { | ||||
| 	['A'] = { | ||||
| 		['['] = { CH_SUBSYSTEM, PUSH, ST_ATTACHED }, | ||||
| 		[']'] = { CH_SUBSYSTEM, POP, ST_ATTACHED }, | ||||
| 	}, | ||||
| 	['B'] = { | ||||
| 		['j'] = { CH_SUBSYSTEM, PUSH, ST_JOIN_BARRIER }, | ||||
| 		['J'] = { CH_SUBSYSTEM, POP, ST_JOIN_BARRIER }, | ||||
| 		['b'] = { CH_SUBSYSTEM, PUSH, ST_BARRIER }, | ||||
| 		['B'] = { CH_SUBSYSTEM, POP, ST_BARRIER }, | ||||
| 		['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKING_BARRIER }, | ||||
| 		['T'] = { CH_SUBSYSTEM, POP, ST_TASKING_BARRIER }, | ||||
| 		['s'] = { CH_SUBSYSTEM, PUSH, ST_SPIN_WAIT }, | ||||
| 		['S'] = { CH_SUBSYSTEM, POP, ST_SPIN_WAIT }, | ||||
| 	}, | ||||
| 	['W'] = { | ||||
| 		['s'] = { CH_SUBSYSTEM, PUSH, ST_FOR_STATIC }, | ||||
| 		['S'] = { CH_SUBSYSTEM, POP, ST_FOR_STATIC }, | ||||
| 		['d'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_INIT }, | ||||
| 		['D'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_INIT }, | ||||
| 		['c'] = { CH_SUBSYSTEM, PUSH, ST_FOR_DYNAMIC_CHUNK }, | ||||
| 		['C'] = { CH_SUBSYSTEM, POP, ST_FOR_DYNAMIC_CHUNK }, | ||||
| 		['i'] = { CH_SUBSYSTEM, PUSH, ST_SINGLE }, | ||||
| 		['I'] = { CH_SUBSYSTEM, POP, ST_SINGLE }, | ||||
| 	}, | ||||
| 	['T'] = { | ||||
| 		['r'] = { CH_SUBSYSTEM, PUSH, ST_RELEASE_DEPS }, | ||||
| 		['R'] = { CH_SUBSYSTEM, POP, ST_RELEASE_DEPS }, | ||||
| 		['w'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT_DEPS }, | ||||
| 		['W'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT_DEPS }, | ||||
| 		['['] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK }, | ||||
| 		[']'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK }, | ||||
| 		['i'] = { CH_SUBSYSTEM, PUSH, ST_INVOKE_TASK_IF0 }, | ||||
| 		['I'] = { CH_SUBSYSTEM, POP, ST_INVOKE_TASK_IF0 }, | ||||
| 		['a'] = { CH_SUBSYSTEM, PUSH, ST_TASK_ALLOC }, | ||||
| 		['A'] = { CH_SUBSYSTEM, POP, ST_TASK_ALLOC }, | ||||
| 		['s'] = { CH_SUBSYSTEM, PUSH, ST_TASK_SCHEDULE }, | ||||
| 		['S'] = { CH_SUBSYSTEM, POP, ST_TASK_SCHEDULE }, | ||||
| 		['t'] = { CH_SUBSYSTEM, PUSH, ST_TASKWAIT }, | ||||
| 		['T'] = { CH_SUBSYSTEM, POP, ST_TASKWAIT }, | ||||
| 		['y'] = { CH_SUBSYSTEM, PUSH, ST_TASKYIELD }, | ||||
| 		['Y'] = { CH_SUBSYSTEM, POP, ST_TASKYIELD }, | ||||
| 		['d'] = { CH_SUBSYSTEM, PUSH, ST_TASK_DUP_ALLOC }, | ||||
| 		['D'] = { CH_SUBSYSTEM, POP, ST_TASK_DUP_ALLOC }, | ||||
| 		['c'] = { CH_SUBSYSTEM, PUSH, ST_CHECK_DEPS }, | ||||
| 		['C'] = { CH_SUBSYSTEM, POP, ST_CHECK_DEPS }, | ||||
| 		['g'] = { CH_SUBSYSTEM, PUSH, ST_TASKGROUP }, | ||||
| 		['G'] = { CH_SUBSYSTEM, POP, ST_TASKGROUP }, | ||||
| 	}, | ||||
| }; | ||||
| 
 | ||||
| static int | ||||
| process_ev(struct emu *emu) | ||||
| { | ||||
| 	if (!emu->thread->is_running) { | ||||
| 		err("current thread %d not running", emu->thread->tid); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	const int *entry = fn_table[emu->ev->c][emu->ev->v]; | ||||
| 	int chind = entry[0]; | ||||
| 	int action = entry[1]; | ||||
| 	int st = entry[2]; | ||||
| 
 | ||||
| 	struct openmp_thread *th = EXT(emu->thread, 'P'); | ||||
| 	struct chan *ch = &th->m.ch[chind]; | ||||
| 
 | ||||
| 	if (action == PUSH) { | ||||
| 		return chan_push(ch, value_int64(st)); | ||||
| 	} else if (action == POP) { | ||||
| 		return chan_pop(ch, value_int64(st)); | ||||
| 	} else if (action == IGN) { | ||||
| 		return 0; /* do nothing */ | ||||
| 	} | ||||
| 
 | ||||
| 	err("unknown openmp function event"); | ||||
| 	return -1; | ||||
| } | ||||
| 
 | ||||
| int | ||||
| model_openmp_event(struct emu *emu) | ||||
| { | ||||
| 	dbg("in openmp_event"); | ||||
| 	if (emu->ev->m != 'P') { | ||||
| 		err("unexpected event model %c", emu->ev->m); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	dbg("got openmp event %s", emu->ev->mcv); | ||||
| 	if (process_ev(emu) != 0) { | ||||
| 		err("error processing openmp event"); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
							
								
								
									
										56
									
								
								src/emu/openmp/openmp_priv.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								src/emu/openmp/openmp_priv.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,56 @@ | ||||
| /* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
 | ||||
|  * SPDX-License-Identifier: GPL-3.0-or-later */ | ||||
| 
 | ||||
| #ifndef OPENMP_PRIV_H | ||||
| #define OPENMP_PRIV_H | ||||
| 
 | ||||
| #include "emu.h" | ||||
| #include "model_cpu.h" | ||||
| #include "model_thread.h" | ||||
| 
 | ||||
| /* Private enums */ | ||||
| 
 | ||||
| enum openmp_chan { | ||||
| 	CH_SUBSYSTEM = 0, | ||||
| 	CH_MAX, | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
| enum openmp_function_values { | ||||
| 	ST_ATTACHED = 1, | ||||
| 	ST_JOIN_BARRIER, | ||||
| 	ST_BARRIER, | ||||
| 	ST_TASKING_BARRIER, | ||||
| 	ST_SPIN_WAIT, | ||||
| 	ST_FOR_STATIC, | ||||
| 	ST_FOR_DYNAMIC_INIT, | ||||
| 	ST_FOR_DYNAMIC_CHUNK, | ||||
| 	ST_SINGLE, | ||||
| 	ST_RELEASE_DEPS, | ||||
| 	ST_TASKWAIT_DEPS, | ||||
| 	ST_INVOKE_TASK, | ||||
| 	ST_INVOKE_TASK_IF0, | ||||
| 	ST_TASK_ALLOC, | ||||
| 	ST_TASK_SCHEDULE, | ||||
| 	ST_TASKWAIT, | ||||
| 	ST_TASKYIELD, | ||||
| 	ST_TASK_DUP_ALLOC, | ||||
| 	ST_CHECK_DEPS, | ||||
| 	ST_TASKGROUP, | ||||
| }; | ||||
| 
 | ||||
| struct openmp_thread { | ||||
| 	struct model_thread m; | ||||
| }; | ||||
| 
 | ||||
| struct openmp_cpu { | ||||
| 	struct model_cpu m; | ||||
| }; | ||||
| 
 | ||||
| int model_openmp_probe(struct emu *emu); | ||||
| int model_openmp_create(struct emu *emu); | ||||
| int model_openmp_connect(struct emu *emu); | ||||
| int model_openmp_event(struct emu *emu); | ||||
| int model_openmp_finish(struct emu *emu); | ||||
| 
 | ||||
| #endif /* OPENMP_PRIV_H */ | ||||
							
								
								
									
										251
									
								
								src/emu/openmp/setup.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										251
									
								
								src/emu/openmp/setup.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,251 @@ | ||||
| /* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
 | ||||
|  * SPDX-License-Identifier: GPL-3.0-or-later */ | ||||
| 
 | ||||
| #include "openmp_priv.h" | ||||
| #include <stddef.h> | ||||
| #include "chan.h" | ||||
| #include "common.h" | ||||
| #include "emu.h" | ||||
| #include "emu_args.h" | ||||
| #include "emu_prv.h" | ||||
| #include "ev_spec.h" | ||||
| #include "extend.h" | ||||
| #include "model.h" | ||||
| #include "model_chan.h" | ||||
| #include "model_cpu.h" | ||||
| #include "model_pvt.h" | ||||
| #include "model_thread.h" | ||||
| #include "pv/pcf.h" | ||||
| #include "pv/prv.h" | ||||
| #include "system.h" | ||||
| #include "thread.h" | ||||
| #include "track.h" | ||||
| #include "value.h" | ||||
| 
 | ||||
| static const char model_name[] = "openmp"; | ||||
| enum { model_id = 'P' }; | ||||
| 
 | ||||
| static struct ev_decl model_evlist[] = { | ||||
| 	PAIR_E("PA[", "PA]", "the attached state") | ||||
| 
 | ||||
| 	PAIR_E("PBj", "PBJ", "a join barrier") | ||||
| 	PAIR_E("PBb", "PBB", "a barrier") | ||||
| 	PAIR_E("PBt", "PBT", "a tasking barrier") | ||||
| 	PAIR_E("PBs", "PBS", "a spin wait") | ||||
| 
 | ||||
| 	PAIR_B("PWs", "PWS", "static for") | ||||
| 	PAIR_B("PWd", "PWD", "dynamic for init") | ||||
| 	PAIR_B("PWc", "PWC", "dynamic for chunk") | ||||
| 	PAIR_B("PWi", "PWI", "single") | ||||
| 
 | ||||
| 	PAIR_B("PTr", "PTR", "releasing task dependencies") | ||||
| 	PAIR_B("PTw", "PTW", "waiting for taskwait dependencies") | ||||
| 	PAIR_B("PT[", "PT]", "invoking a task") | ||||
| 	PAIR_B("PTi", "PTI", "invoking an if0 task") | ||||
| 	PAIR_B("PTa", "PTA", "task allocation") | ||||
| 	PAIR_B("PTs", "PTS", "scheduling a task") | ||||
| 	PAIR_E("PTt", "PTT", "a taskwait") | ||||
| 	PAIR_E("PTy", "PTY", "a taskyield") | ||||
| 	PAIR_B("PTd", "PTD", "duplicating a task") | ||||
| 	PAIR_B("PTc", "PTC", "checking task dependencies") | ||||
| 	PAIR_E("PTg", "PTG", "a taskgroup") | ||||
| 
 | ||||
| 	{ NULL, NULL }, | ||||
| }; | ||||
| 
 | ||||
| struct model_spec model_openmp = { | ||||
| 	.name = model_name, | ||||
| 	.version = "1.1.0", | ||||
| 	.evlist  = model_evlist, | ||||
| 	.model = model_id, | ||||
| 	.create  = model_openmp_create, | ||||
| 	.connect = model_openmp_connect, | ||||
| 	.event   = model_openmp_event, | ||||
| 	.probe   = model_openmp_probe, | ||||
| 	.finish  = model_openmp_finish, | ||||
| }; | ||||
| 
 | ||||
| /* ----------------- channels ------------------ */ | ||||
| 
 | ||||
| static const char *chan_name[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = "subsystem", | ||||
| }; | ||||
| 
 | ||||
| static const int chan_stack[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = 1, | ||||
| }; | ||||
| 
 | ||||
| /* ----------------- pvt ------------------ */ | ||||
| 
 | ||||
| static const int pvt_type[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = PRV_OPENMP_SUBSYSTEM, | ||||
| }; | ||||
| 
 | ||||
| static const char *pcf_prefix[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = "OpenMP subsystem", | ||||
| }; | ||||
| 
 | ||||
| static const struct pcf_value_label openmp_subsystem_values[] = { | ||||
| 	{ ST_ATTACHED,          "Attached" }, | ||||
| 	{ ST_JOIN_BARRIER,      "Join barrier" }, | ||||
| 	{ ST_BARRIER,           "Barrier" }, | ||||
| 	{ ST_TASKING_BARRIER,   "Tasking barrier" }, | ||||
| 	{ ST_SPIN_WAIT,         "Spin wait" }, | ||||
| 	{ ST_FOR_STATIC,        "For static" }, | ||||
| 	{ ST_FOR_DYNAMIC_INIT,  "For dynamic init" }, | ||||
| 	{ ST_FOR_DYNAMIC_CHUNK, "For dynamic chunk" }, | ||||
| 	{ ST_SINGLE,            "Single" }, | ||||
| 	{ ST_RELEASE_DEPS,      "Release deps" }, | ||||
| 	{ ST_TASKWAIT_DEPS,     "Taskwait deps" }, | ||||
| 	{ ST_INVOKE_TASK,       "Invoke task" }, | ||||
| 	{ ST_INVOKE_TASK_IF0,   "Invoke task if0" }, | ||||
| 	{ ST_TASK_ALLOC,        "Task alloc" }, | ||||
| 	{ ST_TASK_SCHEDULE,     "Task schedule" }, | ||||
| 	{ ST_TASKWAIT,          "Taskwait" }, | ||||
| 	{ ST_TASKYIELD,         "Taskyield" }, | ||||
| 	{ ST_TASK_DUP_ALLOC,    "Task dup alloc" }, | ||||
| 	{ ST_CHECK_DEPS,        "Check deps" }, | ||||
| 	{ ST_TASKGROUP,         "Taskgroup" }, | ||||
| 	{ -1, NULL }, | ||||
| }; | ||||
| 
 | ||||
| static const struct pcf_value_label *pcf_labels[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = openmp_subsystem_values, | ||||
| }; | ||||
| 
 | ||||
| static const long prv_flags[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = PRV_SKIPDUP, | ||||
| }; | ||||
| 
 | ||||
| static const struct model_pvt_spec pvt_spec = { | ||||
| 	.type = pvt_type, | ||||
| 	.prefix = pcf_prefix, | ||||
| 	.label = pcf_labels, | ||||
| 	.flags = prv_flags, | ||||
| }; | ||||
| 
 | ||||
| /* ----------------- tracking ------------------ */ | ||||
| 
 | ||||
| static const int th_track[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = TRACK_TH_RUN, | ||||
| }; | ||||
| 
 | ||||
| static const int cpu_track[CH_MAX] = { | ||||
| 	[CH_SUBSYSTEM] = TRACK_TH_RUN, | ||||
| }; | ||||
| 
 | ||||
| /* ----------------- chan_spec ------------------ */ | ||||
| 
 | ||||
| static const struct model_chan_spec th_chan = { | ||||
| 	.nch = CH_MAX, | ||||
| 	.prefix = model_name, | ||||
| 	.ch_names = chan_name, | ||||
| 	.ch_stack = chan_stack, | ||||
| 	.pvt = &pvt_spec, | ||||
| 	.track = th_track, | ||||
| }; | ||||
| 
 | ||||
| static const struct model_chan_spec cpu_chan = { | ||||
| 	.nch = CH_MAX, | ||||
| 	.prefix = model_name, | ||||
| 	.ch_names = chan_name, | ||||
| 	.ch_stack = chan_stack, | ||||
| 	.pvt = &pvt_spec, | ||||
| 	.track = cpu_track, | ||||
| }; | ||||
| 
 | ||||
| /* ----------------- models ------------------ */ | ||||
| 
 | ||||
| static const struct model_cpu_spec cpu_spec = { | ||||
| 	.size = sizeof(struct openmp_cpu), | ||||
| 	.chan = &cpu_chan, | ||||
| 	.model = &model_openmp, | ||||
| }; | ||||
| 
 | ||||
| static const struct model_thread_spec th_spec = { | ||||
| 	.size = sizeof(struct openmp_thread), | ||||
| 	.chan = &th_chan, | ||||
| 	.model = &model_openmp, | ||||
| }; | ||||
| 
 | ||||
| /* ----------------------------------------------------- */ | ||||
| 
 | ||||
| int | ||||
| model_openmp_probe(struct emu *emu) | ||||
| { | ||||
| 	return model_version_probe(&model_openmp, emu); | ||||
| } | ||||
| 
 | ||||
| int | ||||
| model_openmp_create(struct emu *emu) | ||||
| { | ||||
| 	if (model_thread_create(emu, &th_spec) != 0) { | ||||
| 		err("model_thread_init failed"); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	if (model_cpu_create(emu, &cpu_spec) != 0) { | ||||
| 		err("model_cpu_init failed"); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int | ||||
| model_openmp_connect(struct emu *emu) | ||||
| { | ||||
| 	if (model_thread_connect(emu, &th_spec) != 0) { | ||||
| 		err("model_thread_connect failed"); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	if (model_cpu_connect(emu, &cpu_spec) != 0) { | ||||
| 		err("model_cpu_connect failed"); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| end_lint(struct emu *emu) | ||||
| { | ||||
| 	/* Only run the check if we finished the complete trace */ | ||||
| 	if (!emu->finished) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	struct system *sys = &emu->system; | ||||
| 
 | ||||
| 	/* Ensure we run out of function states */ | ||||
| 	for (struct thread *t = sys->threads; t; t = t->gnext) { | ||||
| 		struct openmp_thread *th = EXT(t, model_id); | ||||
| 		struct chan *ch = &th->m.ch[CH_SUBSYSTEM]; | ||||
| 		int stacked = ch->data.stack.n; | ||||
| 		if (stacked > 0) { | ||||
| 			struct value top; | ||||
| 			if (chan_read(ch, &top) != 0) { | ||||
| 				err("chan_read failed for function"); | ||||
| 				return -1; | ||||
| 			} | ||||
| 
 | ||||
| 			err("thread %d ended with %d stacked openmp functions", | ||||
| 					t->tid, stacked); | ||||
| 			return -1; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int | ||||
| model_openmp_finish(struct emu *emu) | ||||
| { | ||||
| 	/* When running in linter mode perform additional checks */ | ||||
| 	if (emu->args.linter_mode && end_lint(emu) != 0) { | ||||
| 		err("end_lint failed"); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Raúl Peñacoba Veigas
						Raúl Peñacoba Veigas