Compare commits
36 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 148aaa71a1 | |||
| cdc5b9b866 | |||
| 5e502b67d2 | |||
| 6aba89a8a8 | |||
| 9e6f691325 | |||
| 7ce892a9ff | |||
| 4c58f4619b | |||
| 8f4aa59148 | |||
| d115ecad64 | |||
| 73ce6ed035 | |||
| bba46ac200 | |||
| 79d17b5b0c | |||
| 7522886d3f | |||
| 16dbc8bf5d | |||
| f6fc166a38 | |||
| ceaac3bcf0 | |||
| d83e8f2d68 | |||
| f6d85e9af5 | |||
| 1f30e8ef8b | |||
| 24805f607b | |||
| 94ede68bab | |||
| 8d59161a62 | |||
| 4e3406d5f8 | |||
| 2bf739efed | |||
| 6a595fff0b | |||
| 6a54f19b76 | |||
| 3f6ec86890 | |||
| 8e9cc34e5e | |||
| 1b95fa813b | |||
| aafaf6e954 | |||
| 4ec966cb67 | |||
| 3751f3ac64 | |||
| f31e73003f | |||
| add2c5638a | |||
| eceec52194 | |||
| 492d6fa290 |
39
CHANGELOG.md
39
CHANGELOG.md
@ -7,46 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Add support for hardware counters (HWC) in nOS-V.
|
||||
|
||||
### Changed
|
||||
|
||||
- Increase nOS-V model version to 2.6.0.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix a bug in ovniemu when loading loom CPUs from multiple threads.
|
||||
|
||||
## [1.12.0] - 2025-05-08
|
||||
|
||||
### Changed
|
||||
|
||||
- OpenMP model version increased to 1.2.1.
|
||||
- nOS-V model version increased to 2.5.1.
|
||||
|
||||
### Added
|
||||
|
||||
- Add support OpenMP label and task ID views.
|
||||
- Add support for nOS-V non-blocking scheduler server events (`VSN` and `VSn`).
|
||||
- Add OpenMP simple breakdown view.
|
||||
- Add bench6 package to run full mini-apps for tests.
|
||||
|
||||
## [1.11.0] - 2024-11-08
|
||||
|
||||
### Added
|
||||
|
||||
- Introduce part model.
|
||||
- Support for `nosv_cond_wait`, `nosv_cond_signal` and `nosv_cond_broadcast` events VA{oOgGkK}.
|
||||
|
||||
### Changed
|
||||
|
||||
- Enable -Wconversion and -Wsign-conversion.
|
||||
- Update trace format to version 3.
|
||||
- The ovni.require metadata key is now mandatory.
|
||||
- Store process metadata in thread metadata.
|
||||
- nOS-V model version increased to 2.4.0.
|
||||
|
||||
### Fixed
|
||||
|
||||
@ -311,8 +274,6 @@ are used along with some other changes.
|
||||
- First ovni release.
|
||||
|
||||
[unreleased]: https://jungle.bsc.es/git/rarias/ovni
|
||||
[1.12.0]: https://github.com/rodarima/ovni/releases/tag/1.12.0
|
||||
[1.11.0]: https://github.com/rodarima/ovni/releases/tag/1.11.0
|
||||
[1.10.0]: https://github.com/rodarima/ovni/releases/tag/1.10.0
|
||||
[1.9.1]: https://github.com/rodarima/ovni/releases/tag/1.9.1
|
||||
[1.9.0]: https://github.com/rodarima/ovni/releases/tag/1.9.0
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
# Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(OVNI LANGUAGES C VERSION 1.12.0)
|
||||
project(OVNI LANGUAGES C VERSION 1.10.0)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
||||
@ -79,7 +79,7 @@ if(IWYU)
|
||||
set(CMAKE_C_INCLUDE_WHAT_YOU_USE ${IWYU_CMD})
|
||||
message(STATUS "IWYU found")
|
||||
else()
|
||||
message(STATUS "IWYU not found, skipping")
|
||||
message(WARNING "IWYU not found, skipping")
|
||||
endif()
|
||||
|
||||
# Required for clock_gettime() in glibc <= 2.17
|
||||
@ -96,14 +96,6 @@ if (NOT HAVE_CLOCK_GETTIME)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Check packages and features once
|
||||
find_package(Nanos6)
|
||||
find_package(Nodes)
|
||||
find_package(Nosv)
|
||||
find_package(Libompv)
|
||||
include(CheckPerfParanoid)
|
||||
include(CheckOmpSs2Compiler)
|
||||
|
||||
add_subdirectory(include)
|
||||
add_subdirectory(src)
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_custom_color_enabled true
|
||||
window_custom_color_palette {6.000000000000:94,0,0},{7.000000000000:153,114,0},{9.000000000000:124,213,228},{10.000000000000:242,239,141},{11.000000000000:0,70,0},{19.000000000000:195,96,151},{20.000000000000:255,162,255},{21.000000000000:203,255,3},{22.000000000000:7,255,12},{23.000000000000:21,165,118},{24.000000000000:255,103,0},{25.000000000000:200,30,5},{26.000000000000:255,10,200},{27.000000000000:98,133,80},{28.000000000000:0,99,162},{29.000000000000:110,77,252}
|
||||
window_custom_color_palette {6.000000000000:94,0,0},{7.000000000000:153,114,0},{9.000000000000:124,213,228},{10.000000000000:242,239,141},{11.000000000000:0,70,0},{19.000000000000:195,96,151},{20.000000000000:255,162,255},{21.000000000000:203,255,3},{22.000000000000:7,255,12},{23.000000000000:21,224,189},{24.000000000000:255,103,0},{25.000000000000:0,99,162},{26.000000000000:110,77,252}
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
#ParaverCFG
|
||||
ConfigFile.Version: 3.4
|
||||
ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW CPU: OpenMP Runtime/Label breakdown >
|
||||
################################################################################
|
||||
window_name CPU: OpenMP Runtime/Label breakdown
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 100
|
||||
window_position_y 100
|
||||
window_width 600
|
||||
window_height 150
|
||||
window_comm_lines_enabled false
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_custom_color_enabled true
|
||||
window_custom_color_palette {1.000000000000:255,177,245},{2.000000000000:255,86,239},{3.000000000000:122,44,22},{5.000000000000:239,188,0},{6.000000000000:160,89,0},{8.000000000000:0,255,73},{10.000000000000:86,209,43},{11.000000000000:203,208,93},{12.000000000000:0,176,169},{13.000000000000:190,82,201},{14.000000000000:124,114,183},{15.000000000000:157,231,255},{16.000000000000:199,194,0},{17.000000000000:96,0,200},{18.000000000000:255,255,124},{19.000000000000:35,152,0},{21.000000000000:255,251,174},{22.000000000000:232,0,0},{23.000000000000:210,66,40},{26.000000000000:101,101,99},{27.000000000000:200,0,255},{28.000000000000:0,203,249},{30.000000000000:255,219,0},{31.000000000000:48,103,107},{34.000000000000:194,105,126} {100.000000000000:0,100,0},{101.000000000000:162,155,60},{102.000000000000:124,0,0}
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
window_comm_tagsize true
|
||||
window_comm_typeval true
|
||||
window_units Microseconds
|
||||
window_maximum_y 1000.0
|
||||
window_minimum_y 1.0
|
||||
window_compute_y_max true
|
||||
window_level thread
|
||||
window_scale_relative 1.000000000000
|
||||
window_end_time_relative 1.000000000000
|
||||
window_object appl { 1, { All } }
|
||||
window_begin_time_relative 0.000000000000
|
||||
window_open true
|
||||
window_drawmode draw_randnotzero
|
||||
window_drawmode_rows draw_randnotzero
|
||||
window_pixel_size 1
|
||||
window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 53
|
||||
window_filter_module evt_type_label 1 "CPU: OpenMP Runtime/Label breakdown"
|
||||
|
||||
@ -1,41 +0,0 @@
|
||||
#ParaverCFG
|
||||
ConfigFile.Version: 3.4
|
||||
ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW CPU: OpenMP label of the RUNNING thread >
|
||||
################################################################################
|
||||
window_name CPU: OpenMP label of the RUNNING thread
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 100
|
||||
window_position_y 100
|
||||
window_width 600
|
||||
window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
window_comm_tagsize true
|
||||
window_comm_typeval true
|
||||
window_units Microseconds
|
||||
window_maximum_y 1000.0
|
||||
window_minimum_y 1.0
|
||||
window_compute_y_max true
|
||||
window_level thread
|
||||
window_scale_relative 1.000000000000
|
||||
window_end_time_relative 1.000000000000
|
||||
window_object appl { 1, { All } }
|
||||
window_begin_time_relative 0.000000000000
|
||||
window_open true
|
||||
window_drawmode draw_randnotzero
|
||||
window_drawmode_rows draw_randnotzero
|
||||
window_pixel_size 1
|
||||
window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 51
|
||||
window_filter_module evt_type_label 1 "CPU: OpenMP label of the RUNNING thread"
|
||||
@ -1,41 +0,0 @@
|
||||
#ParaverCFG
|
||||
ConfigFile.Version: 3.4
|
||||
ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW CPU: OpenMP task id of the RUNNING thread >
|
||||
################################################################################
|
||||
window_name CPU: OpenMP task id of the RUNNING thread
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 100
|
||||
window_position_y 100
|
||||
window_width 600
|
||||
window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
window_comm_tagsize true
|
||||
window_comm_typeval true
|
||||
window_units Microseconds
|
||||
window_maximum_y 1000.0
|
||||
window_minimum_y 1.0
|
||||
window_compute_y_max true
|
||||
window_level thread
|
||||
window_scale_relative 1.000000000000
|
||||
window_end_time_relative 1.000000000000
|
||||
window_object appl { 1, { All } }
|
||||
window_begin_time_relative 0.000000000000
|
||||
window_open true
|
||||
window_drawmode draw_randnotzero
|
||||
window_drawmode_rows draw_randnotzero
|
||||
window_pixel_size 1
|
||||
window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 52
|
||||
window_filter_module evt_type_label 1 "CPU: OpenMP task id of the RUNNING thread"
|
||||
@ -17,7 +17,7 @@ window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_custom_color_enabled true
|
||||
window_custom_color_palette {6.000000000000:94,0,0},{7.000000000000:153,114,0},{9.000000000000:124,213,228},{10.000000000000:242,239,141},{11.000000000000:0,70,0},{19.000000000000:195,96,151},{20.000000000000:255,162,255},{21.000000000000:203,255,3},{22.000000000000:7,255,12},{23.000000000000:21,165,118},{24.000000000000:255,103,0},{25.000000000000:200,30,5},{26.000000000000:255,10,200},{27.000000000000:98,133,80},{28.000000000000:0,99,162},{29.000000000000:110,77,252}
|
||||
window_custom_color_palette {6.000000000000:94,0,0},{7.000000000000:153,114,0},{9.000000000000:124,213,228},{10.000000000000:242,239,141},{11.000000000000:0,70,0},{19.000000000000:195,96,151},{20.000000000000:255,162,255},{21.000000000000:203,255,3},{22.000000000000:7,255,12},{23.000000000000:21,224,189},{24.000000000000:255,103,0},{25.000000000000:0,99,162},{26.000000000000:110,77,252}
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
|
||||
@ -1,41 +0,0 @@
|
||||
#ParaverCFG
|
||||
ConfigFile.Version: 3.4
|
||||
ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW Thread: OpenMP label of the ACTIVE thread >
|
||||
################################################################################
|
||||
window_name Thread: OpenMP label of the ACTIVE thread
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 100
|
||||
window_position_y 100
|
||||
window_width 600
|
||||
window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
window_comm_tagsize true
|
||||
window_comm_typeval true
|
||||
window_units Microseconds
|
||||
window_maximum_y 1000.0
|
||||
window_minimum_y 1.0
|
||||
window_compute_y_max true
|
||||
window_level thread
|
||||
window_scale_relative 1.000000000000
|
||||
window_end_time_relative 1.000000000000
|
||||
window_object appl { 1, { All } }
|
||||
window_begin_time_relative 0.000000000000
|
||||
window_open true
|
||||
window_drawmode draw_randnotzero
|
||||
window_drawmode_rows draw_randnotzero
|
||||
window_pixel_size 1
|
||||
window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 51
|
||||
window_filter_module evt_type_label 1 "Thread: OpenMP label of the ACTIVE thread"
|
||||
@ -1,41 +0,0 @@
|
||||
#ParaverCFG
|
||||
ConfigFile.Version: 3.4
|
||||
ConfigFile.NumWindows: 1
|
||||
|
||||
|
||||
################################################################################
|
||||
< NEW DISPLAYING WINDOW Thread: OpenMP task id of the ACTIVE thread >
|
||||
################################################################################
|
||||
window_name Thread: OpenMP task id of the ACTIVE thread
|
||||
window_type single
|
||||
window_id 1
|
||||
window_position_x 100
|
||||
window_position_y 100
|
||||
window_width 600
|
||||
window_height 150
|
||||
window_comm_lines_enabled true
|
||||
window_flags_enabled false
|
||||
window_noncolor_mode true
|
||||
window_logical_filtered true
|
||||
window_physical_filtered false
|
||||
window_comm_fromto true
|
||||
window_comm_tagsize true
|
||||
window_comm_typeval true
|
||||
window_units Microseconds
|
||||
window_maximum_y 1000.0
|
||||
window_minimum_y 1.0
|
||||
window_compute_y_max true
|
||||
window_level thread
|
||||
window_scale_relative 1.000000000000
|
||||
window_end_time_relative 1.000000000000
|
||||
window_object appl { 1, { All } }
|
||||
window_begin_time_relative 0.000000000000
|
||||
window_open true
|
||||
window_drawmode draw_randnotzero
|
||||
window_drawmode_rows draw_randnotzero
|
||||
window_pixel_size 1
|
||||
window_labels_to_draw 1
|
||||
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
|
||||
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
|
||||
window_filter_module evt_type 1 52
|
||||
window_filter_module evt_type_label 1 "Thread: OpenMP task id of the ACTIVE thread"
|
||||
@ -1,21 +0,0 @@
|
||||
# Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
set(LIBOMPV_FLAG "-fopenmp=libompv")
|
||||
|
||||
include(CheckCCompilerFlag)
|
||||
|
||||
# Add the flag at compile and link time
|
||||
set(CMAKE_REQUIRED_LINK_OPTIONS "${LIBOMPV_FLAG}")
|
||||
check_c_compiler_flag("${LIBOMPV_FLAG}" LIBOMPV_FOUND)
|
||||
|
||||
if(NOT LIBOMPV_FOUND)
|
||||
message(STATUS "Compiler doesn't support -fopenmp=libompv")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(NOT TARGET Libompv)
|
||||
add_library(Libompv INTERFACE)
|
||||
target_compile_options(Libompv INTERFACE "${LIBOMPV_FLAG}")
|
||||
target_link_options(Libompv INTERFACE "${LIBOMPV_FLAG}")
|
||||
endif()
|
||||
@ -1,4 +1,4 @@
|
||||
mkdocs==1.6.0
|
||||
mkdocs==1.4.1
|
||||
markdown==3.3.7
|
||||
python-markdown-math==0.8
|
||||
jinja2==3.1.2
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Emulator events
|
||||
|
||||
This is a exhaustive list of the events recognized by the emulator.
|
||||
Built on Jul 28 2025.
|
||||
Built on Jun 17 2024.
|
||||
|
||||
## Model nanos6
|
||||
|
||||
@ -437,7 +437,7 @@ List of events for the model *ovni* with identifier **`O`** at version `1.1.0`:
|
||||
|
||||
## Model openmp
|
||||
|
||||
List of events for the model *openmp* with identifier **`P`** at version `1.2.1`:
|
||||
List of events for the model *openmp* with identifier **`P`** at version `1.1.0`:
|
||||
<dl>
|
||||
<dt><a id="PBb" href="#PBb"><pre>PBb</pre></a></dt>
|
||||
<dd>begins plain barrier</dd>
|
||||
@ -563,18 +563,6 @@ List of events for the model *openmp* with identifier **`P`** at version `1.2.1`
|
||||
<dd>begins initialization</dd>
|
||||
<dt><a id="PCI" href="#PCI"><pre>PCI</pre></a></dt>
|
||||
<dd>ceases initialization</dd>
|
||||
<dt><a id="POc" href="#POc"><pre>POc+(u32 typeid, str label)</pre></a></dt>
|
||||
<dd>creates a type %{typeid} with label "%{label}"</dd>
|
||||
<dt><a id="PPc" href="#PPc"><pre>PPc(u32 taskid, u32 typeid)</pre></a></dt>
|
||||
<dd>creates the task %{taskid} with type %{typeid}</dd>
|
||||
<dt><a id="PPx" href="#PPx"><pre>PPx(u32 taskid)</pre></a></dt>
|
||||
<dd>executes the task %{taskid}</dd>
|
||||
<dt><a id="PPe" href="#PPe"><pre>PPe(u32 taskid)</pre></a></dt>
|
||||
<dd>ends the task %{taskid}</dd>
|
||||
<dt><a id="PQx" href="#PQx"><pre>PQx(u32 typeid)</pre></a></dt>
|
||||
<dd>begins worksharing with type %{typeid}</dd>
|
||||
<dt><a id="PQe" href="#PQe"><pre>PQe(u32 typeid)</pre></a></dt>
|
||||
<dd>ends worksharing with type %{typeid}</dd>
|
||||
</dl>
|
||||
|
||||
## Model tampi
|
||||
@ -633,7 +621,7 @@ List of events for the model *tampi* with identifier **`T`** at version `1.0.0`:
|
||||
|
||||
## Model nosv
|
||||
|
||||
List of events for the model *nosv* with identifier **`V`** at version `2.6.0`:
|
||||
List of events for the model *nosv* with identifier **`V`** at version `2.3.0`:
|
||||
<dl>
|
||||
<dt><a id="VTc" href="#VTc"><pre>VTc(u32 taskid, u32 typeid)</pre></a></dt>
|
||||
<dd>creates task %{taskid} with type %{typeid}</dd>
|
||||
@ -660,13 +648,9 @@ List of events for the model *nosv* with identifier **`V`** at version `2.6.0`:
|
||||
<dt><a id="VSf" href="#VSf"><pre>VSf</pre></a></dt>
|
||||
<dd>is no longer hungry</dd>
|
||||
<dt><a id="VS[" href="#VS["><pre>VS[</pre></a></dt>
|
||||
<dd>enters scheduler server blocking mode</dd>
|
||||
<dd>enters scheduler server mode</dd>
|
||||
<dt><a id="VS]" href="#VS]"><pre>VS]</pre></a></dt>
|
||||
<dd>leaves scheduler server blocking mode</dd>
|
||||
<dt><a id="VSN" href="#VSN"><pre>VSN</pre></a></dt>
|
||||
<dd>enters scheduler server non-blocking mode</dd>
|
||||
<dt><a id="VSn" href="#VSn"><pre>VSn</pre></a></dt>
|
||||
<dd>leaves scheduler server non-blocking mode</dd>
|
||||
<dd>leaves scheduler server mode</dd>
|
||||
<dt><a id="VU[" href="#VU["><pre>VU[</pre></a></dt>
|
||||
<dd>starts submitting a task</dd>
|
||||
<dt><a id="VU]" href="#VU]"><pre>VU]</pre></a></dt>
|
||||
@ -731,18 +715,6 @@ List of events for the model *nosv* with identifier **`V`** at version `2.6.0`:
|
||||
<dd>enters nosv_barrier_wait()</dd>
|
||||
<dt><a id="VAB" href="#VAB"><pre>VAB</pre></a></dt>
|
||||
<dd>leaves nosv_barrier_wait()</dd>
|
||||
<dt><a id="VAo" href="#VAo"><pre>VAo</pre></a></dt>
|
||||
<dd>enters nosv_cond_wait()</dd>
|
||||
<dt><a id="VAO" href="#VAO"><pre>VAO</pre></a></dt>
|
||||
<dd>leaves nosv_cond_wait()</dd>
|
||||
<dt><a id="VAg" href="#VAg"><pre>VAg</pre></a></dt>
|
||||
<dd>enters nosv_cond_signal()</dd>
|
||||
<dt><a id="VAG" href="#VAG"><pre>VAG</pre></a></dt>
|
||||
<dd>leaves nosv_cond_signal()</dd>
|
||||
<dt><a id="VAk" href="#VAk"><pre>VAk</pre></a></dt>
|
||||
<dd>enters nosv_cond_broadcast()</dd>
|
||||
<dt><a id="VAK" href="#VAK"><pre>VAK</pre></a></dt>
|
||||
<dd>leaves nosv_cond_broadcast()</dd>
|
||||
<dt><a id="VHa" href="#VHa"><pre>VHa</pre></a></dt>
|
||||
<dd>enters nosv_attach()</dd>
|
||||
<dt><a id="VHA" href="#VHA"><pre>VHA</pre></a></dt>
|
||||
@ -761,6 +733,4 @@ List of events for the model *nosv* with identifier **`V`** at version `2.6.0`:
|
||||
<dd>sets progress state to Resting</dd>
|
||||
<dt><a id="VPa" href="#VPa"><pre>VPa</pre></a></dt>
|
||||
<dd>sets progress state to Absorbing</dd>
|
||||
<dt><a id="VWC" href="#VWC"><pre>VWC+(i64 value)</pre></a></dt>
|
||||
<dd>set hardware counters (first %{value})</dd>
|
||||
</dl>
|
||||
|
||||
@ -104,32 +104,3 @@ of CPUs in that state, not the physical CPUs like other views.
|
||||
Here is an example of the Heat mini-app:
|
||||
|
||||

|
||||
|
||||
## Hardware counters (HWC) view
|
||||
|
||||
The hardware counter view allows you to see the *delta* of a given set of
|
||||
hardware counters over time. The counters are read at the beginning and end of
|
||||
tasks as well as at some nOS-V API methods.
|
||||
|
||||
To enable support for HWC in nOS-V use at least level 2 in `ovni.level` or
|
||||
enable the "hwc" event set in nosv.toml. Then, make sure the
|
||||
`hwcounters.backend` option is set to "papi" and select the counters you want to
|
||||
enable in `hwcounters.papi_events`. Here is an example to trace total
|
||||
instructions and cycles:
|
||||
|
||||
```
|
||||
instrumentation.version = "ovni"
|
||||
ovni.level = 2
|
||||
hwcounters.backend = "papi"
|
||||
hwcounters.papi_events = [ "PAPI_TOT_INS", "PAPI_TOT_CYC" ]
|
||||
```
|
||||
|
||||
You can use the `papi_avail` tool to see which counters are available for a
|
||||
particular machine and a description of each counter. Each CPU has a limit in
|
||||
how many counters can be enabled at the same time, reported in the *Number
|
||||
Hardware Counters* line.
|
||||
|
||||
The events for HWC are generated in cpu.prv and thread.prv for CPUs and threads,
|
||||
respectively. For each enabled hardware counter, a new configuration file will
|
||||
be created at `cfg/cpu/nosv/hwc-*.cfg` and `cfg/thread/nosv/hwc-*.cfg` with the
|
||||
corresponding name of the counter.
|
||||
|
||||
@ -8,20 +8,21 @@ refer to the
|
||||
|
||||
The [LLVM OpenMP Runtime](https://openmp.llvm.org/design/Runtimes.html) provides
|
||||
an implementation of the OpenMP specification as a component of the LLVM
|
||||
compiler infrastructure. We have modified the LLVM OpenMP runtime (libomp) to run on top
|
||||
compiler infrastructure. We have modified the LLVM OpenMP runtime to run on top
|
||||
of the [nOS-V](https://gitlab.bsc.es/nos-v/nos-v) runtime as part of the
|
||||
[OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **libompv**.
|
||||
[OmpSs-2 LLVM compiler](https://pm.bsc.es/llvm-ompss), named **OpenMP-V**.
|
||||
|
||||
We have added instrumentation events to libompv designed to be enabled along
|
||||
We have added instrumentation events to OpenMP-V designed to be enabled along
|
||||
the [nOS-V instrumentation](nosv.md). This document describes all the
|
||||
instrumentation features included in our modified libompv runtime to identify
|
||||
instrumentation features included in our modified OpenMP-V runtime to identify
|
||||
what is happening. This data is useful for both users and developers of the
|
||||
OpenMP runtime to analyze issues and undesired behaviors.
|
||||
|
||||
!!! Note
|
||||
|
||||
Instrumenting libomp is planned but is not yet posible.
|
||||
For now you must use libompv.
|
||||
Instrumenting the original OpenMP runtime from the LLVM project is planned
|
||||
but is not yet posible. For now you must use the modified OpenMP-V runtime
|
||||
with nOS-V.
|
||||
|
||||
## Enable the instrumentation
|
||||
|
||||
@ -32,25 +33,25 @@ To generate runtime traces, you will have to:
|
||||
documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md).
|
||||
Typically you should use the `--with-ovni` option at configure time to specify
|
||||
where ovni is installed.
|
||||
2. **Build libompv with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH`
|
||||
2. **Build OpenMP-V with ovni and nOS-V support:** Use the `PKG_CONFIG_PATH`
|
||||
environment variable to specify the nOS-V and ovni installation
|
||||
when configuring CMake.
|
||||
3. **Enable the instrumentation in nOS-V at runtime:** Refer to the
|
||||
[nOS-V documentation](https://github.com/bsc-pm/nos-v/blob/master/docs/user/tracing.md)
|
||||
to find out how to enable the tracing at runtime. Typically you can just set
|
||||
`NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni"`.
|
||||
4. **Enable the instrumentation of libompv at runtime:** Set the environment
|
||||
4. **Enable the instrumentation of OpenMP-V at runtime:** Set the environment
|
||||
variable `OMP_OVNI=1`.
|
||||
|
||||
Next sections describe each of the views included for analysis.
|
||||
Currently there is only support for the subsystem view, which is documented
|
||||
below. The view is complemented with the information of [nOS-V views](nosv.md),
|
||||
as OpenMP-V uses nOS-V tasks to run the workers.
|
||||
|
||||
## Subsystem view
|
||||
|
||||

|
||||
|
||||
The view is complemented with the information of [nOS-V views](nosv.md),
|
||||
as libompv uses nOS-V tasks to run the workers.
|
||||
Subsystem illustrates the activities of each thread with different states:
|
||||
This view illustrates the activities of each thread with different states:
|
||||
|
||||
- **Work-distribution subsystem**: Related to work-distribution constructs,
|
||||
[in Chapter 11][workdis].
|
||||
@ -134,9 +135,9 @@ Subsystem illustrates the activities of each thread with different states:
|
||||
- **Fork call**: Preparing a parallel section using the fork-join model.
|
||||
Only called from the master thread.
|
||||
|
||||
- **Init**: Initializing the libompv runtime.
|
||||
- **Init**: Initializing the OpenMP-V runtime.
|
||||
|
||||
- **Internal microtask**: Running a internal libompv function as a microtask.
|
||||
- **Internal microtask**: Running a internal OpenMP-V function as a microtask.
|
||||
|
||||
- **User microtask**: Running user code as a microtask in a worker thread.
|
||||
|
||||
@ -155,42 +156,9 @@ Subsystem illustrates the activities of each thread with different states:
|
||||
[critical]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.2
|
||||
[barrier]: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#section.15.3
|
||||
|
||||
## Label view
|
||||
|
||||
The label view displays the text in the `label()` clause of OpenMP
|
||||
tasks and work distribution constructs (static and dynamic for, single
|
||||
and section). When the label is not provided, the source file and source
|
||||
line location is used instead.
|
||||
|
||||
When nesting multiple tasks or work distribution constructs, only the
|
||||
innermost label is shown.
|
||||
|
||||
Note that in this view, the numeric event value is a hash function of
|
||||
the type label, so two distinct tasks (declared in different parts of
|
||||
the code) with the same label will share the event value and have the
|
||||
same color.
|
||||
|
||||
## Task ID view
|
||||
|
||||
The task ID view represents the numeric ID of the OpenMP task that is
|
||||
currently running on each thread. The ID is a monotonically increasing
|
||||
identifier assigned on task creation. Lower IDs correspond to tasks
|
||||
created at an earlier point than higher IDs.
|
||||
|
||||
# Breakdown (simple)
|
||||
|
||||
A simplified view for the breakdown is generated when the emulator is run with
|
||||
the `-b` flag, the trace is stored in `openmp-breakdown.prv`. This breakdown
|
||||
view selects the label when it has a value or the subsystem otherwise. The view
|
||||
is sorted so that rows with same values are grouped together.
|
||||
|
||||
Notice that unlike nOS-V or Nanos6, we don't include yet the information about
|
||||
the runtime waiting or making progress, but some information can be inferred
|
||||
from the subsystem states.
|
||||
|
||||
## Limitations
|
||||
|
||||
As the compiler generates the code that perform the calls to the libompv
|
||||
As the compiler generates the code that perform the calls to the OpenMP-V
|
||||
runtime, there are some parts of the execution that are complicated to
|
||||
instrument by just placing a pair of events to delimite a function.
|
||||
|
||||
|
||||
@ -33,10 +33,6 @@ Track changes in emulator model versions.
|
||||
|
||||
## OpenMP
|
||||
|
||||
- openmp 1.2.1:
|
||||
- Fix task events so they don't overlap with re-executions
|
||||
- openmp 1.2.0:
|
||||
- Add support for labels and task ID views
|
||||
- openmp 1.1.0: Initial version
|
||||
|
||||
## TAMPI
|
||||
@ -45,14 +41,6 @@ Track changes in emulator model versions.
|
||||
|
||||
## nOS-V
|
||||
|
||||
- nosv 2.6.0
|
||||
- Add support for hardware counters event `VWC`.
|
||||
- nosv 2.5.1
|
||||
- Remove task complete callback from between `VTx` and `VTe` events.
|
||||
- nosv 2.5.0
|
||||
- Add support for non-blocking scheduler server events `VS{Nn}`.
|
||||
- nosv 2.4.0
|
||||
- Add support for `nosv_cond_wait`, `nosv_cond_signal` and `nosv_cond_broadcast` events `VA{oOgGkK}`.
|
||||
- nosv 2.3.0
|
||||
- Add `nosv.can_breakdown` attribute to metadata for breakdown checks.
|
||||
- nosv 2.2.0
|
||||
|
||||
@ -9,7 +9,7 @@ the same node use the same environment variables.
|
||||
During the execution of your program, a per-thread buffer is kept where the new
|
||||
events are being recorded. When this buffer is full, it is written to disk and
|
||||
emptied, an operation known as flush. This may take a while depending on the
|
||||
underlying filesystem.
|
||||
underliying filesystem.
|
||||
|
||||
Keep in mind that the thread will be blocked until the flush ends, so if your
|
||||
filesystem is slow it would interrupt the execution of your program for a long
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
# Introduction
|
||||
|
||||
To use *libovni* to instrument a program follow the next instructions
|
||||
carefully or you may end up with an incomplete trace that is rejected at
|
||||
To use *libovni* to instrument a program, follow the next instructions
|
||||
carefully, or you may end up with an incomplete trace that is rejected at
|
||||
emulation.
|
||||
|
||||
You can also generate a valid trace from your own software or hardware
|
||||
directly following the [trace specification](trace_spec.md).
|
||||
directly, but be sure to follow the [trace specification](trace_spec.md).
|
||||
|
||||
## Initialization
|
||||
|
||||
@ -21,7 +21,6 @@ To initialize libovni follow these steps in all threads:
|
||||
|
||||
3. **Init the thread**. Call `ovni_thread_init()` to initialize the thread.
|
||||
Multiple attempts to initialize the same thread are ignored with a warning.
|
||||
Must be called by all threads.
|
||||
|
||||
The `ovni_proc_init()` arguments are as follows:
|
||||
|
||||
@ -33,7 +32,7 @@ The `app` defines the "appid" of the program, which must be a number >0. This is
|
||||
useful to run multiple processes some of which run the same "app", so you can
|
||||
tell which one is which. The `loom` argument defines the
|
||||
[loom](../concepts/part-model.md#loom) name and maps the process to that
|
||||
loom. It must be composed of the host name, a dot and a suffix. The PID is the
|
||||
loom. It must be compose of the host name, a dot and a suffix. The PID is the
|
||||
one obtained by `getpid(2)`.
|
||||
|
||||
The `ovni_thread_init()` function only accepts one argument, the TID as returned
|
||||
@ -46,8 +45,7 @@ the thread stream.
|
||||
|
||||
1. **Require models**. Call `ovni_thread_require()` with the required model
|
||||
version before emitting events for a given model. Only required once from a
|
||||
thread in a given trace. The `ovni` model is implicitly required when calling
|
||||
`ovni_thread_init()`, so there is no need to add it again.
|
||||
thread in a given trace.
|
||||
|
||||
2. **Emit loom CPUs**. Call `ovni_add_cpu()` to register each CPU in the loom. It can
|
||||
be done from a single thread or multiple threads, in the latter the list of
|
||||
@ -56,18 +54,6 @@ the thread stream.
|
||||
3. **Set the rank**. If you use MPI, call `ovni_proc_set_rank()` to register the
|
||||
rank and number of ranks of the current execution. Only once per process.
|
||||
|
||||
When emitting the CPUs with:
|
||||
|
||||
```c
|
||||
void ovni_add_cpu(int index, int phyid);
|
||||
```
|
||||
|
||||
The `index` will be used to identify the CPU in the loom and goes from 0 to N -
|
||||
1, where N is the number of CPUs in the loom. It must match the index that is
|
||||
used in affinity events when a thread switches to another CPU. The `phyid` is
|
||||
only displayed in Paraver and is usually the same as the index, but it can be
|
||||
different if there are multiple looms per node.
|
||||
|
||||
## Start the execution
|
||||
|
||||
The current thread must switch to the "Running" state before any event can be
|
||||
@ -97,10 +83,10 @@ set to -1.
|
||||
|
||||
After this point you can emit any other event from this thread. Use the
|
||||
`ovni_ev_*` set of functions to create and emit events. Notice that all events
|
||||
refer to the current thread that emits them.
|
||||
are refer to the current thread that emits them.
|
||||
|
||||
If you need to store metadata information, use the `ovni_attr_*` set of
|
||||
functions. The metadata is stored in disk by `ovni_attr_flush()` and when the
|
||||
functions. The metadata is stored in disk by `ovni_attr_fluch()` and when the
|
||||
thread is freed by `ovni_thread_free()`.
|
||||
|
||||
Attempting to emit events or writing metadata without having a thread
|
||||
@ -120,4 +106,4 @@ otherwise the trace **will be rejected**.
|
||||
to set the process state to finished.
|
||||
|
||||
If a thread fails to perform these steps, the complete trace will be rejected by
|
||||
the emulator as it cannot guarantee it to be consistent.
|
||||
the emulator as it cannot guarantee the trace to be consistent.
|
||||
|
||||
20
flake.lock
generated
20
flake.lock
generated
@ -7,11 +7,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1757680122,
|
||||
"narHash": "sha256-V8sN1npm8aE4o0CJROU4B7cUUzkla21wn2jud3F/pPU=",
|
||||
"lastModified": 1705310446,
|
||||
"narHash": "sha256-PaPnkGotb2omIV6OsS72MGkqNN6Q/iHLlXQZ6S3vWOY=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "c7b5ec13b8d596a79942e5bd18ea7049472613b9",
|
||||
"revCount": 984,
|
||||
"rev": "3b21a32d835ff06741d5d59cd023ff2ae1ecb19f",
|
||||
"revCount": 932,
|
||||
"type": "git",
|
||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||
},
|
||||
@ -22,16 +22,14 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1736867362,
|
||||
"narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
|
||||
"type": "github"
|
||||
"lastModified": 1693663421,
|
||||
"narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=",
|
||||
"path": "/nix/store/wl5m5xfayd69ycyspzyd4rilfgl6wmh0-source",
|
||||
"rev": "e56990880811a451abd32515698c712788be5720",
|
||||
"type": "path"
|
||||
},
|
||||
"original": {
|
||||
"id": "nixpkgs",
|
||||
"rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
|
||||
"type": "indirect"
|
||||
}
|
||||
},
|
||||
|
||||
44
flake.nix
44
flake.nix
@ -1,5 +1,5 @@
|
||||
{
|
||||
inputs.nixpkgs.url = "nixpkgs/9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc";
|
||||
inputs.nixpkgs.url = "nixpkgs";
|
||||
inputs.bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
|
||||
inputs.bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
|
||||
|
||||
@ -15,49 +15,30 @@
|
||||
nosv = prev.nosv.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "efed594c3fdf0a2dd35ef98c9c941c896b4c65f6";
|
||||
gitCommit = "3286ff5a788a989407519dd1dfe57c1750258d3f";
|
||||
};
|
||||
nanos6 = prev.nanos6.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "f39ea57c67a613d098050e2bb251116a021e91e5";
|
||||
gitCommit = "21fccec383a4136daf5919093a6ffcdc8c139bfe";
|
||||
};
|
||||
nodes = prev.nodes.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "c97d7ca6f885500121a94c75df429c788e8d6cf8";
|
||||
gitCommit = "70ce0ed0a20842d8eb3124aa5db5916fb6fc238f";
|
||||
};
|
||||
clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "6ea3824988abf00ead8084994a922182bf5fd8ba";
|
||||
gitCommit = "b813108e2810c235480688ed7d1b0f1faf76e804";
|
||||
};
|
||||
mpi = prev.mpich;
|
||||
bench6 = prev.bench6.overrideAttrs (old: rec {
|
||||
src = builtins.fetchGit {
|
||||
url = "ssh://git@bscpm04.bsc.es/rarias/bench6.git";
|
||||
ref = "master";
|
||||
rev = "14227b1aa5a17deb5b746eb9648de2eb89fe3521";
|
||||
};
|
||||
version = src.shortRev;
|
||||
cmakeFlags = [ "-DCMAKE_C_COMPILER=clang" "-DCMAKE_CXX_COMPILER=clang++" ];
|
||||
env = with final; {
|
||||
NANOS6_HOME = nanos6;
|
||||
NODES_HOME = nodes;
|
||||
NOSV_HOME = nosv;
|
||||
};
|
||||
buildInputs = with final; [ bigotes cmake clangOmpss2 openmp openmpv
|
||||
nanos6 nodes nosv mpi tampi tagaspi gpi-2 openblas openblas.dev ovni
|
||||
];
|
||||
hardeningDisable = [ "all" ];
|
||||
dontStrip = true;
|
||||
});
|
||||
|
||||
# Use a fixed commit for libovni
|
||||
ovniFixed = prev.ovni.override {
|
||||
useGit = true;
|
||||
gitBranch = "master";
|
||||
gitCommit = "3bbfe0f0ecdf58e3f46ebafdf2540680f990b76b";
|
||||
# Includes ovni_attr_* API
|
||||
gitCommit = "d1e8a62396ae92934c0b6e248d5f6ff921bef56f";
|
||||
};
|
||||
# Build with the current source
|
||||
ovniLocal = prev.ovni.overrideAttrs (old: rec {
|
||||
@ -78,15 +59,14 @@
|
||||
};
|
||||
compilerList = with pkgs; [
|
||||
#gcc49Stdenv # broken
|
||||
#gcc6Stdenv # deprecated
|
||||
#gcc7Stdenv # deprecated
|
||||
#gcc8Stdenv # deprecated
|
||||
gcc6Stdenv
|
||||
gcc7Stdenv
|
||||
gcc8Stdenv
|
||||
gcc9Stdenv
|
||||
gcc10Stdenv
|
||||
gcc11Stdenv
|
||||
gcc12Stdenv
|
||||
gcc13Stdenv
|
||||
gcc14Stdenv
|
||||
];
|
||||
lib = pkgs.lib;
|
||||
in {
|
||||
@ -136,9 +116,7 @@
|
||||
# We need to be able to exit the chroot to run Nanos6 tests, as they
|
||||
# require access to /sys for hwloc
|
||||
__noChroot = true;
|
||||
buildInputs = old.buildInputs ++ (with pkgs; [
|
||||
pkg-config nosv nanos6 nodes openmpv bench6
|
||||
]);
|
||||
buildInputs = old.buildInputs ++ (with pkgs; [ pkg-config nosv nanos6 nodes openmpv ]);
|
||||
cmakeFlags = old.cmakeFlags ++ [ "-DENABLE_ALL_TESTS=ON" ];
|
||||
preConfigure = old.preConfigure or "" + ''
|
||||
export NOSV_HOME="${pkgs.nosv}"
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
include_directories(
|
||||
@ -39,7 +39,6 @@ add_library(emu STATIC
|
||||
pv/prv.c
|
||||
pv/pvt.c
|
||||
pv/cfg.c
|
||||
pv/cfg_file.c
|
||||
recorder.c
|
||||
system.c
|
||||
task.c
|
||||
@ -54,9 +53,8 @@ add_library(emu STATIC
|
||||
nanos6/event.c
|
||||
nanos6/breakdown.c
|
||||
nosv/breakdown.c
|
||||
nosv/event.c
|
||||
nosv/hwc.c
|
||||
nosv/setup.c
|
||||
nosv/event.c
|
||||
nodes/setup.c
|
||||
nodes/event.c
|
||||
mpi/setup.c
|
||||
@ -67,7 +65,6 @@ add_library(emu STATIC
|
||||
kernel/event.c
|
||||
openmp/setup.c
|
||||
openmp/event.c
|
||||
openmp/breakdown.c
|
||||
)
|
||||
target_link_libraries(emu ovni-static)
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef EMU_PRV_H
|
||||
@ -33,14 +33,9 @@ enum emu_prv_types {
|
||||
PRV_NANOS6_BREAKDOWN = 41,
|
||||
PRV_KERNEL_CS = 45,
|
||||
PRV_OPENMP_SUBSYSTEM = 50,
|
||||
PRV_OPENMP_LABEL = 51,
|
||||
PRV_OPENMP_TASKID = 52,
|
||||
PRV_OPENMP_BREAKDOWN = 53,
|
||||
PRV_OVNI_MARK = 100,
|
||||
/* User marks [100, 200) */
|
||||
PRV_NOSV_HWC = 200,
|
||||
/* nOS-V HWC [200, 300) */
|
||||
PRV_RESERVED = 300,
|
||||
PRV_RESERVED = 200,
|
||||
};
|
||||
|
||||
#endif /* EMU_PRV_H */
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "loom.h"
|
||||
@ -97,13 +97,6 @@ load_cpus(struct loom *loom, JSON_Object *meta)
|
||||
int index = (int) json_object_get_number(jcpu, "index");
|
||||
int phyid = (int) json_object_get_number(jcpu, "phyid");
|
||||
|
||||
/* The index can exceed ncpus-1 when CPUs are partially
|
||||
* defined, but it cannot be negative. */
|
||||
if (index < 0) {
|
||||
err("cpu index %d out of bounds", index);
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct cpu *cpu = loom_find_cpu(loom, phyid);
|
||||
|
||||
if (cpu) {
|
||||
@ -181,14 +174,13 @@ loom_find_cpu(struct loom *loom, int phyid)
|
||||
struct cpu *
|
||||
loom_get_cpu(struct loom *loom, int index)
|
||||
{
|
||||
if (loom->cpus_array == NULL)
|
||||
die("cpus_array not yet populated");
|
||||
|
||||
if (index == -1)
|
||||
return &loom->vcpu;
|
||||
|
||||
if (index < 0 || (size_t) index >= loom->ncpus)
|
||||
if (index < 0 || (size_t) index >= loom->ncpus) {
|
||||
err("cpu index out of bounds");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return loom->cpus_array[index];
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "nanos6_priv.h"
|
||||
@ -313,7 +313,7 @@ model_nanos6_create(struct emu *emu)
|
||||
extend_set(&emu->ext, model_id, e);
|
||||
|
||||
if (model_nanos6_breakdown_create(emu) != 0) {
|
||||
err("model_nanos6_breakdown_create failed");
|
||||
err("model_nanos6_breakdown_connect failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2023-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "breakdown.h"
|
||||
@ -49,49 +49,36 @@ create_cpu(struct bay *bay, struct nosv_breakdown_cpu *bcpu, int64_t gindex)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
check_thread_metadata(struct thread *th)
|
||||
{
|
||||
if (th->meta == NULL) {
|
||||
err("thread has no metadata");
|
||||
return -1;
|
||||
}
|
||||
|
||||
JSON_Value *val = json_object_dotget_value(th->meta, "nosv.can_breakdown");
|
||||
if (val == NULL) {
|
||||
err("missing nosv.can_breakdown attribute");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!json_value_get_boolean(val)) {
|
||||
err("nosv.can_breakdown is false, missing events to enable breakdown");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
model_nosv_breakdown_create(struct emu *emu)
|
||||
{
|
||||
struct nosv_emu *memu = EXT(emu, 'V');
|
||||
struct nosv_breakdown_emu *bemu = &memu->breakdown;
|
||||
size_t nbreakdown = 0;
|
||||
|
||||
/* Stop here if breakdown not enabled */
|
||||
if (emu->args.breakdown == 0)
|
||||
return 0;
|
||||
|
||||
/* We need to make sure that *all* threads have the can_breakdown key
|
||||
* present and set to true, as otherwise we may be missing some data in
|
||||
* the breakdown view. */
|
||||
for (struct thread *th = emu->system.threads; th; th = th->gnext) {
|
||||
/* All threads must have the can_breakdown key */
|
||||
JSON_Value *val = json_object_dotget_value(th->meta,
|
||||
"nosv.can_breakdown");
|
||||
|
||||
if (val == NULL) {
|
||||
err("missing nosv.can_breakdown key: %s", th->id);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (json_value_get_type(val) != JSONBoolean) {
|
||||
err("expected boolean nosv.can_breakdown key: %s", th->id);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Count how many they have it enabled */
|
||||
if (json_value_get_boolean(val))
|
||||
nbreakdown++;
|
||||
}
|
||||
|
||||
/* Enable breakdown if all threads can produce a breakdown trace */
|
||||
if (nbreakdown != emu->system.nthreads) {
|
||||
warn("cannot enable breakdown for nOS-V model (suitable %zd/%zd)",
|
||||
nbreakdown, emu->system.nthreads);
|
||||
return 0;
|
||||
}
|
||||
|
||||
info("enabling breakdown for nOS-V model");
|
||||
bemu->enabled = 1;
|
||||
struct nosv_emu *memu = EXT(emu, 'V');
|
||||
struct nosv_breakdown_emu *bemu = &memu->breakdown;
|
||||
|
||||
/* Count phy cpus */
|
||||
struct system *sys = &emu->system;
|
||||
@ -124,6 +111,13 @@ model_nosv_breakdown_create(struct emu *emu)
|
||||
}
|
||||
}
|
||||
|
||||
for (struct thread *th = emu->system.threads; th; th = th->gnext) {
|
||||
if (check_thread_metadata(th) != 0) {
|
||||
err("bad nosv metadata in thread: %s", th->id);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -247,10 +241,6 @@ model_nosv_breakdown_connect(struct emu *emu)
|
||||
|
||||
struct nosv_emu *memu = EXT(emu, 'V');
|
||||
struct nosv_breakdown_emu *bemu = &memu->breakdown;
|
||||
|
||||
if (!bemu->enabled)
|
||||
return 0;
|
||||
|
||||
struct bay *bay = &emu->bay;
|
||||
struct system *sys = &emu->system;
|
||||
|
||||
@ -300,10 +290,6 @@ model_nosv_breakdown_finish(struct emu *emu,
|
||||
|
||||
struct nosv_emu *memu = EXT(emu, 'V');
|
||||
struct nosv_breakdown_emu *bemu = &memu->breakdown;
|
||||
|
||||
if (!bemu->enabled)
|
||||
return 0;
|
||||
|
||||
struct pcf *pcf = pvt_get_pcf(bemu->pvt);
|
||||
long typeid = PRV_NOSV_BREAKDOWN;
|
||||
char label[] = "CPU: nOS-V Runtime/Idle/Task breakdown";
|
||||
|
||||
@ -51,7 +51,6 @@ struct nosv_breakdown_emu {
|
||||
int64_t nphycpus;
|
||||
struct sort sort;
|
||||
struct pvt *pvt;
|
||||
int enabled;
|
||||
};
|
||||
|
||||
#endif /* BREAKDOWN_H */
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "nosv_priv.h"
|
||||
@ -26,8 +26,6 @@ static const int ss_table[256][256][3] = {
|
||||
['f'] = { CHSS, POP, ST_SCHED_HUNGRY },
|
||||
['['] = { CHSS, PUSH, ST_SCHED_SERVING },
|
||||
[']'] = { CHSS, POP, ST_SCHED_SERVING },
|
||||
['N'] = { CHSS, PUSH, ST_SCHED_SERVING }, /* Non-block */
|
||||
['n'] = { CHSS, POP, ST_SCHED_SERVING },
|
||||
['@'] = { CHSS, IGN, -1 },
|
||||
['r'] = { CHSS, IGN, -1 },
|
||||
['s'] = { CHSS, IGN, -1 },
|
||||
@ -69,12 +67,6 @@ static const int ss_table[256][256][3] = {
|
||||
['U'] = { CHSS, POP, ST_API_MUTEX_UNLOCK },
|
||||
['b'] = { CHSS, PUSH, ST_API_BARRIER_WAIT },
|
||||
['B'] = { CHSS, POP, ST_API_BARRIER_WAIT },
|
||||
['o'] = { CHSS, PUSH, ST_API_COND_WAIT },
|
||||
['O'] = { CHSS, POP, ST_API_COND_WAIT },
|
||||
['g'] = { CHSS, PUSH, ST_API_COND_SIGNAL },
|
||||
['G'] = { CHSS, POP, ST_API_COND_SIGNAL },
|
||||
['k'] = { CHSS, PUSH, ST_API_COND_BCAST },
|
||||
['K'] = { CHSS, POP, ST_API_COND_BCAST },
|
||||
},
|
||||
/* FIXME: Move thread type to another channel, like nanos6 */
|
||||
['H'] = {
|
||||
@ -596,8 +588,6 @@ process_ev(struct emu *emu)
|
||||
return pre_task(emu);
|
||||
case 'Y':
|
||||
return pre_type(emu);
|
||||
case 'W':
|
||||
return hwc_event(emu);
|
||||
default:
|
||||
err("unknown nOS-V event category");
|
||||
return -1;
|
||||
|
||||
@ -1,545 +0,0 @@
|
||||
#include "hwc.h"
|
||||
|
||||
#include "chan.h"
|
||||
#include "cpu.h"
|
||||
#include "emu.h"
|
||||
#include "emu_ev.h"
|
||||
#include "emu_prv.h"
|
||||
#include "inttypes.h"
|
||||
#include "nosv_priv.h"
|
||||
#include "ovni.h"
|
||||
#include "parson.h"
|
||||
#include "proc.h"
|
||||
#include "pv/cfg_file.h"
|
||||
#include "pv/pcf.h"
|
||||
#include "pv/prv.h"
|
||||
#include "pv/pvt.h"
|
||||
#include "thread.h"
|
||||
#include "track.h"
|
||||
#include "uthash.h"
|
||||
#include <errno.h>
|
||||
|
||||
static int
|
||||
parse_hwc(struct nosv_hwc_emu *hwc_emu, const char *indexstr, JSON_Value *hwcval)
|
||||
{
|
||||
errno = 0;
|
||||
char *endptr = NULL;
|
||||
size_t index = (size_t) strtol(indexstr, &endptr, 10);
|
||||
|
||||
if (errno != 0 || endptr == indexstr || endptr[0] != '\0') {
|
||||
err("failed to parse hwc index: %s", indexstr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (index >= 100) {
|
||||
err("hwc index should be in [0, 100) range: %zd", index);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (index >= hwc_emu->n) {
|
||||
err("hwc index %zd exceeds allocated counters %zd",
|
||||
index, hwc_emu->n);
|
||||
return -1;
|
||||
}
|
||||
|
||||
JSON_Object *hwc = json_value_get_object(hwcval);
|
||||
if (hwc == NULL) {
|
||||
err("json_value_get_object() failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char *name = json_object_get_string(hwc, "name");
|
||||
if (name == NULL) {
|
||||
err("json_object_get_string() for name failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hwc_emu->name[index] == NULL) {
|
||||
hwc_emu->name[index] = strdup(name);
|
||||
if (hwc_emu->name[index] == NULL) {
|
||||
err("strdup failed");
|
||||
return -1;
|
||||
}
|
||||
dbg("got hwc with index %zd and name %s", index, hwc_emu->name[index]);
|
||||
} else if (strcmp(hwc_emu->name[index], name) != 0) {
|
||||
err("hwc at %zd already defined as %s",
|
||||
index, hwc_emu->name[index]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
scan_thread(struct nosv_hwc_emu *hwc_emu, struct thread *t)
|
||||
{
|
||||
JSON_Object *obj = json_object_dotget_object(t->meta, "nosv.hwc");
|
||||
|
||||
/* No HWC in this thread */
|
||||
if (obj == NULL)
|
||||
return 0;
|
||||
|
||||
size_t n = json_object_get_count(obj);
|
||||
|
||||
/* Ignore empty dictionaries */
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
if (hwc_emu->n == 0) {
|
||||
hwc_emu->name = calloc(n, sizeof(char *));
|
||||
hwc_emu->n = n;
|
||||
} else if (hwc_emu->n != n) {
|
||||
/* We have a mismatch */
|
||||
err("thread %s defines %zd hardware counters, but emu already has %zd",
|
||||
t->id, n, hwc_emu->n);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
const char *indexstr = json_object_get_name(obj, i);
|
||||
if (indexstr == NULL) {
|
||||
err("json_object_get_name failed");
|
||||
return -1;
|
||||
}
|
||||
JSON_Value *hwcval = json_object_get_value_at(obj, i);
|
||||
if (hwcval == NULL) {
|
||||
err("json_object_get_value_at failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (parse_hwc(hwc_emu, indexstr, hwcval) != 0) {
|
||||
err("cannot parse HWC");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
create_thread_chan(struct nosv_hwc_emu *emu, struct bay *bay, struct thread *th)
|
||||
{
|
||||
struct nosv_thread *nosv_thread = EXT(th, 'V');
|
||||
struct nosv_hwc_thread *t = &nosv_thread->hwc;
|
||||
|
||||
/* Create as many channels as required */
|
||||
t->chan = calloc(emu->n, sizeof(struct chan));
|
||||
if (t->chan == NULL) {
|
||||
err("calloc failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
t->n = emu->n;
|
||||
|
||||
for (size_t i = 0; i < t->n; i++) {
|
||||
struct chan *ch = &t->chan[i];
|
||||
chan_init(ch, CHAN_SINGLE, "nosv.thread%"PRIi64".hwc%zd",
|
||||
th->gindex, i);
|
||||
|
||||
/* Allow duplicates, we can emit the same HWC value twice */
|
||||
chan_prop_set(ch, CHAN_ALLOW_DUP, 1);
|
||||
|
||||
if (bay_register(bay, ch) != 0) {
|
||||
err("bay_register failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Setup tracking */
|
||||
t->track = calloc(t->n, sizeof(struct track));
|
||||
if (t->track == NULL) {
|
||||
err("calloc failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < t->n; i++) {
|
||||
struct track *track = &t->track[i];
|
||||
/* For now only tracking to active thread is supported */
|
||||
if (track_init(track, bay, TRACK_TYPE_TH, TRACK_TH_ACT,
|
||||
"nosv.thread%"PRIi64".hwc%zd",
|
||||
th->gindex, i) != 0) {
|
||||
err("track_init failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
init_cpu(struct nosv_hwc_emu *emu, struct bay *bay, struct cpu *cpu)
|
||||
{
|
||||
struct nosv_cpu *nosv_cpu = EXT(cpu, 'V');
|
||||
struct nosv_hwc_cpu *c = &nosv_cpu->hwc;
|
||||
|
||||
/* Setup tracking */
|
||||
c->track = calloc(emu->n, sizeof(struct track));
|
||||
if (c->track == NULL) {
|
||||
err("calloc failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
c->n = emu->n;
|
||||
|
||||
for (size_t i = 0; i < c->n; i++) {
|
||||
struct track *track = &c->track[i];
|
||||
/* For now only tracking to running thread is supported */
|
||||
if (track_init(track, bay, TRACK_TYPE_TH, TRACK_TH_RUN,
|
||||
"nosv.cpu%"PRIi64".hwc%zd",
|
||||
cpu->gindex, i) != 0) {
|
||||
err("track_init failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwc_create(struct emu *emu)
|
||||
{
|
||||
struct nosv_emu *nosv_emu = EXT(emu, 'V');
|
||||
struct nosv_hwc_emu *hwc_emu = &nosv_emu->hwc;
|
||||
|
||||
for (struct thread *th = emu->system.threads; th; th = th->gnext) {
|
||||
if (scan_thread(hwc_emu, th) != 0) {
|
||||
err("scan_thread failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Early exit if no counters found */
|
||||
if (hwc_emu->n == 0) {
|
||||
dbg("no hwc counters found");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Create a buffer to do aligned reads */
|
||||
hwc_emu->values = calloc(hwc_emu->n, sizeof(int64_t));
|
||||
if (hwc_emu->values == NULL) {
|
||||
err("calloc failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Once we know how many HWC we have, allocate the channels for threads
|
||||
* and CPUs. */
|
||||
|
||||
for (struct thread *th = emu->system.threads; th; th = th->gnext) {
|
||||
if (create_thread_chan(hwc_emu, &emu->bay, th) != 0) {
|
||||
err("create_thread_chan failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (struct cpu *cpu = emu->system.cpus; cpu; cpu = cpu->next) {
|
||||
if (init_cpu(hwc_emu, &emu->bay, cpu) != 0) {
|
||||
err("init_cpu failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
connect_thread_prv(struct bay *bay, struct thread *systh, struct prv *prv)
|
||||
{
|
||||
struct nosv_thread *nosv_thread = EXT(systh, 'V');
|
||||
struct nosv_hwc_thread *t = &nosv_thread->hwc;
|
||||
|
||||
for (size_t i = 0; i < t->n; i++) {
|
||||
struct track *track = &t->track[i];
|
||||
struct chan *ch = &t->chan[i];
|
||||
struct chan *sel = &systh->chan[TH_CHAN_STATE];
|
||||
|
||||
/* Connect the input and sel channel to the mux */
|
||||
if (track_connect_thread(track, ch, sel, 1) != 0) {
|
||||
err("track_connect_thread failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Then connect the output of the tracking module to the prv
|
||||
* trace for the current thread */
|
||||
struct chan *out = track_get_output(track);
|
||||
long row = (long) systh->gindex;
|
||||
long flags = PRV_SKIPDUPNULL | PRV_ZERO;
|
||||
long prvtype = (long) (PRV_NOSV_HWC + i);
|
||||
if (prv_register(prv, row, prvtype, bay, out, flags)) {
|
||||
err("prv_register failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
init_pcf(struct emu *emu, struct pcf *pcf)
|
||||
{
|
||||
struct nosv_emu *nosv_emu = EXT(emu, 'V');
|
||||
struct nosv_hwc_emu *hwc_emu = &nosv_emu->hwc;
|
||||
|
||||
for (size_t i = 0; i < hwc_emu->n; i++) {
|
||||
long prvtype = (long) (PRV_NOSV_HWC + i);
|
||||
const char *name = hwc_emu->name[i];
|
||||
struct pcf_type *pcftype = pcf_add_type(pcf, (int) prvtype, name);
|
||||
if (pcftype == NULL) {
|
||||
err("pcf_add_type failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
connect_thread(struct emu *emu)
|
||||
{
|
||||
struct pvt *pvt = recorder_find_pvt(&emu->recorder, "thread");
|
||||
if (pvt == NULL) {
|
||||
err("cannot find thread pvt");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Connect thread channels to PRV */
|
||||
struct prv *prv = pvt_get_prv(pvt);
|
||||
for (struct thread *t = emu->system.threads; t; t = t->gnext) {
|
||||
if (connect_thread_prv(&emu->bay, t, prv) != 0) {
|
||||
err("connect_thread_prv failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Init thread PCF */
|
||||
struct pcf *pcf = pvt_get_pcf(pvt);
|
||||
if (init_pcf(emu, pcf) != 0) {
|
||||
err("init_pcf failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
connect_cpu_prv(struct emu *emu, struct cpu *syscpu, struct prv *prv)
|
||||
{
|
||||
struct nosv_emu *nosv_emu = EXT(emu, 'V');
|
||||
struct nosv_hwc_emu *hwc_emu = &nosv_emu->hwc;
|
||||
|
||||
struct nosv_cpu *nosv_cpu = EXT(syscpu, 'V');
|
||||
struct nosv_hwc_cpu *hwc_cpu = &nosv_cpu->hwc;
|
||||
|
||||
for (size_t i = 0; i < hwc_emu->n; i++) {
|
||||
struct track *track = &hwc_cpu->track[i];
|
||||
struct chan *sel = cpu_get_th_chan(syscpu);
|
||||
|
||||
int64_t nthreads = (int64_t) emu->system.nthreads;
|
||||
if (track_set_select(track, sel, NULL, nthreads) != 0) {
|
||||
err("track_select failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Add each thread as input */
|
||||
for (struct thread *t = emu->system.threads; t; t = t->gnext) {
|
||||
struct nosv_thread *nosv_thread = EXT(t, 'V');
|
||||
struct nosv_hwc_thread *hwc_thread = &nosv_thread->hwc;
|
||||
|
||||
/* Use the input thread directly */
|
||||
struct chan *inp = &hwc_thread->chan[i];
|
||||
|
||||
if (track_set_input(track, t->gindex, inp) != 0) {
|
||||
err("track_add_input failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Then connect the output of the tracking module to the prv
|
||||
* trace for the current cpu */
|
||||
struct chan *out = track_get_output(track);
|
||||
long row = (long) syscpu->gindex;
|
||||
long flags = PRV_SKIPDUPNULL | PRV_ZERO;
|
||||
long prvtype = (long) (PRV_NOSV_HWC + i);
|
||||
if (prv_register(prv, row, prvtype, &emu->bay, out, flags)) {
|
||||
err("prv_register failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
connect_cpu(struct emu *emu)
|
||||
{
|
||||
/* Get cpu PRV */
|
||||
struct pvt *pvt = recorder_find_pvt(&emu->recorder, "cpu");
|
||||
if (pvt == NULL) {
|
||||
err("cannot find thread pvt");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Connect cpu channels to PRV */
|
||||
struct prv *prv = pvt_get_prv(pvt);
|
||||
for (struct cpu *cpu = emu->system.cpus; cpu; cpu = cpu->next) {
|
||||
if (connect_cpu_prv(emu, cpu, prv) != 0) {
|
||||
err("connect_cpu_prv failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Init cpu PCF */
|
||||
struct pcf *pcf = pvt_get_pcf(pvt);
|
||||
if (init_pcf(emu, pcf) != 0) {
|
||||
err("init_pcf failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Connect the channels to the output PVTs */
|
||||
int
|
||||
hwc_connect(struct emu *emu)
|
||||
{
|
||||
struct nosv_emu *nosv_emu = EXT(emu, 'V');
|
||||
struct nosv_hwc_emu *hwc_emu = &nosv_emu->hwc;
|
||||
|
||||
/* No HWC, so nothing to connect */
|
||||
if (hwc_emu->n == 0)
|
||||
return 0;
|
||||
|
||||
if (connect_thread(emu) != 0) {
|
||||
err("connect_thread() failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (connect_cpu(emu) != 0) {
|
||||
err("connect_cpu() failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
event_hwc_count(struct emu *emu)
|
||||
{
|
||||
struct nosv_emu *nosv_emu = EXT(emu, 'V');
|
||||
struct nosv_hwc_emu *hwc_emu = &nosv_emu->hwc;
|
||||
|
||||
if (!emu->ev->is_jumbo) {
|
||||
err("expecting a jumbo event");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Make sure size matches */
|
||||
size_t array_size = (size_t) emu->ev->payload->jumbo.size;
|
||||
size_t expected_size = hwc_emu->n * sizeof(int64_t);
|
||||
if (array_size != expected_size) {
|
||||
err("unexpected hwc event with jumbo payload size %zd (expecting %zd)",
|
||||
array_size, expected_size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Use memcpy to align array */
|
||||
memcpy(hwc_emu->values, &emu->ev->payload->jumbo.data[0], array_size);
|
||||
|
||||
/* Update all HWC channels for the given thread */
|
||||
for (size_t i = 0; i < hwc_emu->n; i++) {
|
||||
struct nosv_thread *nosv_thread = EXT(emu->thread, 'V');
|
||||
struct nosv_hwc_thread *hwc_thread = &nosv_thread->hwc;
|
||||
struct chan *ch = &hwc_thread->chan[i];
|
||||
if (chan_set(ch, value_int64(hwc_emu->values[i])) != 0) {
|
||||
err("chan_set failed for hwc channel %s", ch->name);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwc_event(struct emu *emu)
|
||||
{
|
||||
struct nosv_emu *nosv_emu = EXT(emu, 'V');
|
||||
struct nosv_hwc_emu *hwc_emu = &nosv_emu->hwc;
|
||||
|
||||
/* Panic on HWC event with no counters */
|
||||
if (hwc_emu->n == 0) {
|
||||
err("got hwc event %s but no counters enabled", emu->ev->mcv);
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (emu->ev->v) {
|
||||
case 'C':
|
||||
return event_hwc_count(emu);
|
||||
default:
|
||||
err("unknown nosv hwc event %s", emu->ev->mcv);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
write_cfg(const char *path, size_t i, const char *fmt, const char *name)
|
||||
{
|
||||
char title[MAX_LABEL];
|
||||
|
||||
/* May truncate silently, but is safe */
|
||||
snprintf(title, MAX_LABEL, fmt, name);
|
||||
|
||||
long type = (long) (PRV_NOSV_HWC + i);
|
||||
|
||||
struct cfg_file cf;
|
||||
cfg_file_init(&cf, type, title);
|
||||
cfg_file_color_mode(&cf, CFG_NGRAD);
|
||||
cfg_file_semantic_thread(&cf, CFG_NEXT_EV_VAL);
|
||||
|
||||
if (cfg_file_write(&cf, path) != 0) {
|
||||
err("cfg_file_write failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwc_finish(struct emu *emu)
|
||||
{
|
||||
struct nosv_emu *nosv_emu = EXT(emu, 'V');
|
||||
struct nosv_hwc_emu *hwc_emu = &nosv_emu->hwc;
|
||||
|
||||
if (hwc_emu->n == 0)
|
||||
return 0;
|
||||
|
||||
/* Write CFG files with HWC names. */
|
||||
|
||||
for (size_t i = 0; i < hwc_emu->n; i++) {
|
||||
const char *dir = emu->args.tracedir;
|
||||
const char *name = hwc_emu->name[i];
|
||||
char path[PATH_MAX];
|
||||
|
||||
/* Create thread configs */
|
||||
if (snprintf(path, PATH_MAX, "%s/cfg/thread/nosv/hwc-%s.cfg", dir, name) >= PATH_MAX) {
|
||||
err("hwc thread cfg path too long");
|
||||
return -1;
|
||||
}
|
||||
if (write_cfg(path, i, "Thread: nOS-V %s of the ACTIVE thread", name) != 0) {
|
||||
err("write_cfg failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Same for CPUs */
|
||||
if (snprintf(path, PATH_MAX, "%s/cfg/cpu/nosv/hwc-%s.cfg", dir, name) >= PATH_MAX) {
|
||||
err("hwc cpu cfg path too long");
|
||||
return -1;
|
||||
}
|
||||
if (write_cfg(path, i, "CPU: nOS-V %s of the RUNNING thread", name) != 0) {
|
||||
err("write_cfg failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,32 +0,0 @@
|
||||
#ifndef HWC_H
|
||||
#define HWC_H
|
||||
|
||||
#include "common.h"
|
||||
|
||||
struct emu;
|
||||
struct chan;
|
||||
|
||||
/* Store each HWC channel per emu */
|
||||
struct nosv_hwc_emu {
|
||||
char **name;
|
||||
size_t n;
|
||||
int64_t *values;
|
||||
};
|
||||
|
||||
struct nosv_hwc_thread {
|
||||
struct track *track;
|
||||
struct chan *chan;
|
||||
size_t n;
|
||||
};
|
||||
|
||||
struct nosv_hwc_cpu {
|
||||
struct track *track;
|
||||
size_t n;
|
||||
};
|
||||
|
||||
USE_RET int hwc_create(struct emu *emu);
|
||||
USE_RET int hwc_connect(struct emu *emu);
|
||||
USE_RET int hwc_event(struct emu *emu);
|
||||
USE_RET int hwc_finish(struct emu *emu);
|
||||
|
||||
#endif /* HWC_H */
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef NOSV_PRIV_H
|
||||
@ -6,10 +6,9 @@
|
||||
|
||||
#include "breakdown.h"
|
||||
#include "emu.h"
|
||||
#include "hwc.h"
|
||||
#include "task.h"
|
||||
#include "model_cpu.h"
|
||||
#include "model_thread.h"
|
||||
#include "task.h"
|
||||
|
||||
/* Private enums */
|
||||
|
||||
@ -45,9 +44,6 @@ enum nosv_ss_values {
|
||||
ST_API_MUTEX_TRYLOCK,
|
||||
ST_API_MUTEX_UNLOCK,
|
||||
ST_API_BARRIER_WAIT,
|
||||
ST_API_COND_WAIT,
|
||||
ST_API_COND_SIGNAL,
|
||||
ST_API_COND_BCAST,
|
||||
ST_WORKER,
|
||||
ST_DELEGATE,
|
||||
|
||||
@ -59,13 +55,11 @@ enum nosv_ss_values {
|
||||
struct nosv_thread {
|
||||
struct model_thread m;
|
||||
struct task_stack task_stack;
|
||||
struct nosv_hwc_thread hwc;
|
||||
};
|
||||
|
||||
struct nosv_cpu {
|
||||
struct model_cpu m;
|
||||
struct nosv_breakdown_cpu breakdown;
|
||||
struct nosv_hwc_cpu hwc;
|
||||
};
|
||||
|
||||
struct nosv_proc {
|
||||
@ -76,7 +70,6 @@ struct nosv_emu {
|
||||
int connected;
|
||||
int event;
|
||||
struct nosv_breakdown_emu breakdown;
|
||||
struct nosv_hwc_emu hwc;
|
||||
};
|
||||
|
||||
enum nosv_progress {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "nosv_priv.h"
|
||||
@ -12,7 +12,6 @@
|
||||
#include "emu_prv.h"
|
||||
#include "ev_spec.h"
|
||||
#include "extend.h"
|
||||
#include "loom.h"
|
||||
#include "model.h"
|
||||
#include "model_chan.h"
|
||||
#include "model_cpu.h"
|
||||
@ -28,7 +27,6 @@
|
||||
#include "thread.h"
|
||||
#include "track.h"
|
||||
#include "value.h"
|
||||
#include "hwc.h"
|
||||
|
||||
static const char model_name[] = "nosv";
|
||||
enum { model_id = 'V' };
|
||||
@ -48,8 +46,7 @@ static struct ev_decl model_evlist[] = {
|
||||
{ "VS@", "self assigns itself a task" },
|
||||
{ "VSh", "enters the hungry state, waiting for work" },
|
||||
{ "VSf", "is no longer hungry" },
|
||||
PAIR_E("VS[", "VS]", "scheduler server blocking mode")
|
||||
PAIR_E("VSN", "VSn", "scheduler server non-blocking mode")
|
||||
PAIR_E("VS[", "VS]", "scheduler server mode")
|
||||
|
||||
PAIR_S("VU[", "VU]", "submitting a task")
|
||||
PAIR_S("VMa", "VMA", "allocating memory")
|
||||
@ -68,9 +65,6 @@ static struct ev_decl model_evlist[] = {
|
||||
PAIR_E("VAt", "VAT", "nosv_mutex_trylock()")
|
||||
PAIR_E("VAu", "VAU", "nosv_mutex_unlock()")
|
||||
PAIR_E("VAb", "VAB", "nosv_barrier_wait()")
|
||||
PAIR_E("VAo", "VAO", "nosv_cond_wait()")
|
||||
PAIR_E("VAg", "VAG", "nosv_cond_signal()")
|
||||
PAIR_E("VAk", "VAK", "nosv_cond_broadcast()")
|
||||
|
||||
/* FIXME: VHA and VHa are not subsystems */
|
||||
{ "VHa", "enters nosv_attach()" },
|
||||
@ -83,14 +77,12 @@ static struct ev_decl model_evlist[] = {
|
||||
{ "VPr", "sets progress state to Resting" },
|
||||
{ "VPa", "sets progress state to Absorbing" },
|
||||
|
||||
{ "VWC+(i64 value)", "set hardware counters (first %{value})" },
|
||||
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
struct model_spec model_nosv = {
|
||||
.name = model_name,
|
||||
.version = "2.6.0",
|
||||
.version = "2.3.0",
|
||||
.evlist = model_evlist,
|
||||
.model = model_id,
|
||||
.create = model_nosv_create,
|
||||
@ -169,9 +161,6 @@ static const struct pcf_value_label nosv_ss_values[] = {
|
||||
{ ST_API_MUTEX_TRYLOCK,"API: Mutex trylock" },
|
||||
{ ST_API_MUTEX_UNLOCK, "API: Mutex unlock" },
|
||||
{ ST_API_BARRIER_WAIT, "API: Barrier wait" },
|
||||
{ ST_API_COND_WAIT, "API: Cond wait" },
|
||||
{ ST_API_COND_SIGNAL, "API: Cond signal" },
|
||||
{ ST_API_COND_BCAST, "API: Cond broadcast" },
|
||||
{ ST_WORKER, "Thread: Worker" },
|
||||
{ ST_DELEGATE, "Thread: Delegate" },
|
||||
{ EV_SCHED_SEND, "EV Scheduler: Send task" },
|
||||
@ -323,11 +312,6 @@ model_nosv_create(struct emu *emu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hwc_create(emu) != 0) {
|
||||
err("hwc_create failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -349,11 +333,6 @@ model_nosv_connect(struct emu *emu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hwc_connect(emu) != 0) {
|
||||
err("hwc_connect failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (struct thread *th = emu->system.threads; th; th = th->gnext) {
|
||||
struct nosv_thread *mth = EXT(th, model_id);
|
||||
struct chan *idle = &mth->m.ch[CH_IDLE];
|
||||
@ -455,11 +434,6 @@ model_nosv_finish(struct emu *emu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hwc_finish(emu) != 0) {
|
||||
err("hwc_finish failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* When running in linter mode perform additional checks */
|
||||
if (emu->args.linter_mode && end_lint(emu) != 0) {
|
||||
err("end_lint failed");
|
||||
|
||||
@ -1,234 +0,0 @@
|
||||
/* Copyright (c) 2024-2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "breakdown.h"
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include "bay.h"
|
||||
#include "chan.h"
|
||||
#include "common.h"
|
||||
#include "cpu.h"
|
||||
#include "emu.h"
|
||||
#include "emu_args.h"
|
||||
#include "emu_prv.h"
|
||||
#include "extend.h"
|
||||
#include "model_cpu.h"
|
||||
#include "mux.h"
|
||||
#include "openmp_priv.h"
|
||||
#include "proc.h"
|
||||
#include "pv/pcf.h"
|
||||
#include "pv/prf.h"
|
||||
#include "pv/prv.h"
|
||||
#include "pv/pvt.h"
|
||||
#include "recorder.h"
|
||||
#include "sort.h"
|
||||
#include "system.h"
|
||||
#include "task.h"
|
||||
#include "track.h"
|
||||
#include "value.h"
|
||||
|
||||
enum {
|
||||
MUX0_LABEL = 0,
|
||||
MUX0_SUBSYSTEM = 1,
|
||||
};
|
||||
|
||||
static int
|
||||
create_cpu(struct bay *bay, struct breakdown_cpu *bcpu, int64_t gindex)
|
||||
{
|
||||
enum chan_type t = CHAN_SINGLE;
|
||||
chan_init(&bcpu->out, t, "openmp.cpu%"PRIi64".breakdown.out", gindex);
|
||||
|
||||
if (bay_register(bay, &bcpu->out) != 0) {
|
||||
err("bay_register out failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_breakdown_create(struct emu *emu)
|
||||
{
|
||||
if (emu->args.breakdown == 0)
|
||||
return 0;
|
||||
|
||||
struct openmp_emu *memu = EXT(emu, 'P');
|
||||
struct breakdown_emu *bemu = &memu->breakdown;
|
||||
|
||||
/* Count phy cpus */
|
||||
struct system *sys = &emu->system;
|
||||
int64_t nphycpus = (int64_t) (sys->ncpus - sys->nlooms);
|
||||
bemu->nphycpus = nphycpus;
|
||||
|
||||
/* Create a new Paraver trace */
|
||||
struct recorder *rec = &emu->recorder;
|
||||
bemu->pvt = recorder_add_pvt(rec, "openmp-breakdown", (long) nphycpus);
|
||||
if (bemu->pvt == NULL) {
|
||||
err("recorder_add_pvt failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (sort_init(&bemu->sort, &emu->bay, nphycpus, "openmp.breakdown.sort") != 0) {
|
||||
err("sort_init failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (struct cpu *cpu = sys->cpus; cpu; cpu = cpu->next) {
|
||||
if (cpu->is_virtual)
|
||||
continue;
|
||||
|
||||
struct openmp_cpu *mcpu = EXT(cpu, 'P');
|
||||
struct breakdown_cpu *bcpu = &mcpu->breakdown;
|
||||
|
||||
if (create_cpu(&emu->bay, bcpu, cpu->gindex) != 0) {
|
||||
err("create_cpu failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
select_mux0(struct mux *mux, struct value value, struct mux_input **input)
|
||||
{
|
||||
if (value.type != VALUE_NULL)
|
||||
*input = mux_get_input(mux, MUX0_LABEL); /* label */
|
||||
else
|
||||
*input = mux_get_input(mux, MUX0_SUBSYSTEM); /* subsystem */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
connect_cpu(struct bay *bay, struct openmp_cpu *mcpu)
|
||||
{
|
||||
struct breakdown_cpu *bcpu = &mcpu->breakdown;
|
||||
|
||||
/* Channel aliases */
|
||||
struct chan *subsystem = &mcpu->m.track[CH_SUBSYSTEM].ch;
|
||||
struct chan *label = &mcpu->m.track[CH_LABEL].ch;
|
||||
struct chan *out = &bcpu->out;
|
||||
|
||||
if (mux_init(&bcpu->mux0, bay, label, out, select_mux0, 2) != 0) {
|
||||
err("mux_init failed for mux0");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mux_set_input(&bcpu->mux0, MUX0_LABEL, label) != 0) {
|
||||
err("mux_set_input subsystem failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mux_set_input(&bcpu->mux0, MUX0_SUBSYSTEM, subsystem) != 0) {
|
||||
err("mux_set_input label failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_breakdown_connect(struct emu *emu)
|
||||
{
|
||||
if (emu->args.breakdown == 0)
|
||||
return 0;
|
||||
|
||||
struct openmp_emu *memu = EXT(emu, 'P');
|
||||
struct breakdown_emu *bemu = &memu->breakdown;
|
||||
struct bay *bay = &emu->bay;
|
||||
struct system *sys = &emu->system;
|
||||
|
||||
int64_t i = 0;
|
||||
for (struct cpu *cpu = sys->cpus; cpu; cpu = cpu->next) {
|
||||
if (cpu->is_virtual)
|
||||
continue;
|
||||
|
||||
struct openmp_cpu *mcpu = EXT(cpu, 'P');
|
||||
struct breakdown_cpu *bcpu = &mcpu->breakdown;
|
||||
|
||||
/* Connect tri channels and muxes */
|
||||
if (connect_cpu(bay, mcpu) != 0) {
|
||||
err("connect_cpu failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Connect out to sort */
|
||||
if (sort_set_input(&bemu->sort, i, &bcpu->out) != 0) {
|
||||
err("sort_set_input failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Connect out to PRV */
|
||||
struct prv *prv = pvt_get_prv(bemu->pvt);
|
||||
long type = PRV_OPENMP_BREAKDOWN;
|
||||
long flags = PRV_SKIPDUP;
|
||||
|
||||
/* We may emit zero at the start, when an input changes and all
|
||||
* the other sort output channels write a zero in the output,
|
||||
* before the last value is set in prv.c. */
|
||||
flags |= PRV_ZERO;
|
||||
|
||||
struct chan *out = sort_get_output(&bemu->sort, i);
|
||||
if (prv_register(prv, (long) i, type, bay, out, flags)) {
|
||||
err("prv_register failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_breakdown_finish(struct emu *emu,
|
||||
const struct pcf_value_label **labels)
|
||||
{
|
||||
if (emu->args.breakdown == 0)
|
||||
return 0;
|
||||
|
||||
struct openmp_emu *memu = EXT(emu, 'P');
|
||||
struct breakdown_emu *bemu = &memu->breakdown;
|
||||
struct pcf *pcf = pvt_get_pcf(bemu->pvt);
|
||||
long typeid = PRV_OPENMP_BREAKDOWN;
|
||||
char label[] = "CPU: OpenMP Runtime/Label breakdown";
|
||||
struct pcf_type *pcftype = pcf_add_type(pcf, (int) typeid, label);
|
||||
const struct pcf_value_label *v = NULL;
|
||||
|
||||
/* Emit subsystem values */
|
||||
for (v = labels[CH_SUBSYSTEM]; v->label; v++) {
|
||||
if (pcf_add_value(pcftype, v->value, v->label) == NULL) {
|
||||
err("pcf_add_value ss failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit label values */
|
||||
struct system *sys = &emu->system;
|
||||
for (struct proc *p = sys->procs; p; p = p->gnext) {
|
||||
struct openmp_proc *proc = EXT(p, 'P');
|
||||
struct task_info *info = &proc->task_info;
|
||||
if (task_create_pcf_types(pcftype, info->types) != 0) {
|
||||
err("task_create_pcf_types failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Also populate the row labels */
|
||||
struct prf *prf = pvt_get_prf(bemu->pvt);
|
||||
for (int64_t row = 0; row < bemu->nphycpus; row++) {
|
||||
char name[128];
|
||||
if (snprintf(name, 128, "~CPU %4" PRIi64, bemu->nphycpus - row) >= 128) {
|
||||
err("label too long");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (prf_add(prf, (long) row, name) != 0) {
|
||||
err("prf_add failed for %s", name);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,48 +0,0 @@
|
||||
/* Copyright (c) 2024-2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef BREAKDOWN_H
|
||||
#define BREAKDOWN_H
|
||||
|
||||
/*
|
||||
* The breakdown model is implemented on top of the CPU label and subsystem
|
||||
* channels. The mux selects the label when the .
|
||||
*
|
||||
* +--------+
|
||||
* | |
|
||||
* | v
|
||||
* | +------+
|
||||
* label ------o-->--| |
|
||||
* | mux0 |-->- out
|
||||
* subsystem ----->--| |
|
||||
* +------+
|
||||
*
|
||||
* mux0 output = label if sel is not null, subsystem otherwise.
|
||||
*
|
||||
* Then the sort module takes the output of each CPU and sorts the values which
|
||||
* are propagated to the PRV directly.
|
||||
*
|
||||
* +------+ +-----+
|
||||
* cpu0.out --->---| |--->---| |
|
||||
* ... | sort | ... | PRV |
|
||||
* cpuN.out --->---| |--->---| |
|
||||
* +------+ +-----+
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "chan.h"
|
||||
#include "mux.h"
|
||||
#include "sort.h"
|
||||
|
||||
struct breakdown_cpu {
|
||||
struct mux mux0;
|
||||
struct chan out;
|
||||
};
|
||||
|
||||
struct breakdown_emu {
|
||||
int64_t nphycpus;
|
||||
struct sort sort;
|
||||
struct pvt *pvt;
|
||||
};
|
||||
|
||||
#endif /* BREAKDOWN_H */
|
||||
@ -8,9 +8,6 @@
|
||||
#include "emu_ev.h"
|
||||
#include "extend.h"
|
||||
#include "model_thread.h"
|
||||
#include "ovni.h"
|
||||
#include "proc.h"
|
||||
#include "task.h"
|
||||
#include "thread.h"
|
||||
#include "value.h"
|
||||
|
||||
@ -98,7 +95,7 @@ static const int fn_table[256][256][3] = {
|
||||
};
|
||||
|
||||
static int
|
||||
simple(struct emu *emu)
|
||||
process_ev(struct emu *emu)
|
||||
{
|
||||
if (!emu->thread->is_running) {
|
||||
err("current thread %d not running", emu->thread->tid);
|
||||
@ -125,266 +122,6 @@ simple(struct emu *emu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
create_task(struct emu *emu)
|
||||
{
|
||||
if (emu->ev->payload_size != 8) {
|
||||
err("unexpected payload size");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t taskid = emu->ev->payload->u32[0];
|
||||
uint32_t typeid = emu->ev->payload->u32[1];
|
||||
|
||||
if (taskid == 0) {
|
||||
err("taskid cannot be 0");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (typeid == 0) {
|
||||
err("typeid cannot be 0");
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct openmp_proc *proc = EXT(emu->proc, 'P');
|
||||
struct task_info *info = &proc->task_info;
|
||||
|
||||
/* OpenMP submits inline tasks without pausing the previous
|
||||
* task, so we relax the model to allow this for now. */
|
||||
uint32_t flags = TASK_FLAG_RELAX_NESTING;
|
||||
|
||||
/* https://gitlab.pm.bsc.es/rarias/ovni/-/issues/208 */
|
||||
flags |= TASK_FLAG_RESURRECT;
|
||||
|
||||
if (task_create(info, typeid, taskid, flags) != 0) {
|
||||
err("task_create failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
dbg("task created with taskid %u", taskid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
update_task(struct emu *emu)
|
||||
{
|
||||
if (emu->ev->payload_size < 4) {
|
||||
err("missing task id in payload");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t taskid = emu->ev->payload->u32[0];
|
||||
|
||||
if (taskid == 0) {
|
||||
err("taskid cannot be 0");
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct openmp_thread *th = EXT(emu->thread, 'P');
|
||||
struct openmp_proc *proc = EXT(emu->proc, 'P');
|
||||
|
||||
struct task_info *info = &proc->task_info;
|
||||
struct task_stack *stack = &th->task_stack;
|
||||
|
||||
struct task *task = task_find(info->tasks, taskid);
|
||||
|
||||
if (task == NULL) {
|
||||
err("cannot find task with id %u", taskid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* OpenMP doesn't have parallel tasks */
|
||||
uint32_t body_id = 1;
|
||||
|
||||
if (emu->ev->v == 'x') {
|
||||
if (task_execute(stack, task, body_id) != 0) {
|
||||
err("cannot change task state to running");
|
||||
return -1;
|
||||
}
|
||||
if (chan_push(&th->m.ch[CH_TASKID], value_int64(task->id)) != 0) {
|
||||
err("chan_push taskid failed");
|
||||
return -1;
|
||||
}
|
||||
if (chan_push(&th->m.ch[CH_LABEL], value_int64(task->type->gid)) != 0) {
|
||||
err("chan_push task label failed");
|
||||
return -1;
|
||||
}
|
||||
} else if (emu->ev->v == 'e') {
|
||||
if (task_end(stack, task, body_id) != 0) {
|
||||
err("cannot change task state to end");
|
||||
return -1;
|
||||
}
|
||||
if (chan_pop(&th->m.ch[CH_TASKID], value_int64(task->id)) != 0) {
|
||||
err("chan_pop taskid failed");
|
||||
return -1;
|
||||
}
|
||||
if (chan_pop(&th->m.ch[CH_LABEL], value_int64(task->type->gid)) != 0) {
|
||||
err("chan_pop task label failed");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
err("unexpected task event %c", emu->ev->v);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pre_task(struct emu *emu)
|
||||
{
|
||||
int ret = 0;
|
||||
switch (emu->ev->v) {
|
||||
case 'c':
|
||||
ret = create_task(emu);
|
||||
break;
|
||||
case 'x':
|
||||
case 'e':
|
||||
ret = update_task(emu);
|
||||
break;
|
||||
default:
|
||||
err("unexpected task event value");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
err("cannot update task state");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pre_type(struct emu *emu)
|
||||
{
|
||||
uint8_t value = emu->ev->v;
|
||||
|
||||
if (value != 'c') {
|
||||
err("unexpected event value %c", value);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!emu->ev->is_jumbo) {
|
||||
err("expecting a jumbo event");
|
||||
return -1;
|
||||
}
|
||||
|
||||
const uint8_t *data = &emu->ev->payload->jumbo.data[0];
|
||||
uint32_t typeid;
|
||||
memcpy(&typeid, data, 4); /* May be unaligned */
|
||||
data += 4;
|
||||
|
||||
const char *label = (const char *) data;
|
||||
|
||||
struct openmp_proc *proc = EXT(emu->proc, 'P');
|
||||
struct task_info *info = &proc->task_info;
|
||||
|
||||
/* It will be used for tasks and worksharings. */
|
||||
if (task_type_create(info, typeid, label) != 0) {
|
||||
err("task_type_create failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
update_ws_state(struct emu *emu, uint8_t action)
|
||||
{
|
||||
if (emu->ev->payload_size < 4) {
|
||||
err("missing worksharing id in payload");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t typeid = emu->ev->payload->u32[0];
|
||||
|
||||
if (typeid == 0) {
|
||||
err("worksharing type id cannot be 0");
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct openmp_thread *th = EXT(emu->thread, 'P');
|
||||
struct openmp_proc *proc = EXT(emu->proc, 'P');
|
||||
|
||||
struct task_info *info = &proc->task_info;
|
||||
|
||||
/* Worksharings share the task type */
|
||||
struct task_type *ttype = task_type_find(info->types, typeid);
|
||||
|
||||
if (ttype == NULL) {
|
||||
err("cannot find ws with type %"PRIu32, typeid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (action == 'x') {
|
||||
if (chan_push(&th->m.ch[CH_LABEL], value_int64(ttype->gid)) != 0) {
|
||||
err("chan_push worksharing label failed");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (chan_pop(&th->m.ch[CH_LABEL], value_int64(ttype->gid)) != 0) {
|
||||
err("chan_pop worksharing label failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pre_worksharing(struct emu *emu)
|
||||
{
|
||||
int ret = 0;
|
||||
switch (emu->ev->v) {
|
||||
case 'x':
|
||||
case 'e':
|
||||
ret = update_ws_state(emu, emu->ev->v);
|
||||
break;
|
||||
default:
|
||||
err("unexpected ws event value %c", emu->ev->v);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
err("cannot update worksharing channels");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
process_ev(struct emu *emu)
|
||||
{
|
||||
if (!emu->thread->is_running) {
|
||||
err("current thread %d not running", emu->thread->tid);
|
||||
return -1;
|
||||
}
|
||||
switch (emu->ev->c) {
|
||||
case 'B':
|
||||
case 'I':
|
||||
case 'W':
|
||||
case 'T':
|
||||
case 'A':
|
||||
case 'M':
|
||||
case 'H':
|
||||
case 'C':
|
||||
return simple(emu);
|
||||
case 'P':
|
||||
return pre_task(emu);
|
||||
case 'O':
|
||||
return pre_type(emu);
|
||||
case 'Q':
|
||||
return pre_worksharing(emu);
|
||||
}
|
||||
|
||||
err("unknown event category");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_event(struct emu *emu)
|
||||
{
|
||||
|
||||
@ -1,21 +1,17 @@
|
||||
/* Copyright (c) 2023-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef OPENMP_PRIV_H
|
||||
#define OPENMP_PRIV_H
|
||||
|
||||
#include "emu.h"
|
||||
#include "task.h"
|
||||
#include "model_cpu.h"
|
||||
#include "model_thread.h"
|
||||
#include "breakdown.h"
|
||||
|
||||
/* Private enums */
|
||||
|
||||
enum openmp_chan {
|
||||
CH_SUBSYSTEM = 0,
|
||||
CH_LABEL,
|
||||
CH_TASKID,
|
||||
CH_MAX,
|
||||
};
|
||||
|
||||
@ -59,21 +55,10 @@ enum openmp_function_values {
|
||||
|
||||
struct openmp_thread {
|
||||
struct model_thread m;
|
||||
struct task_stack task_stack;
|
||||
};
|
||||
|
||||
struct openmp_cpu {
|
||||
struct model_cpu m;
|
||||
struct breakdown_cpu breakdown;
|
||||
};
|
||||
|
||||
struct openmp_proc {
|
||||
/* Shared among tasks and ws */
|
||||
struct task_info task_info;
|
||||
};
|
||||
|
||||
struct openmp_emu {
|
||||
struct breakdown_emu breakdown;
|
||||
};
|
||||
|
||||
int model_openmp_probe(struct emu *emu);
|
||||
@ -82,9 +67,4 @@ int model_openmp_connect(struct emu *emu);
|
||||
int model_openmp_event(struct emu *emu);
|
||||
int model_openmp_finish(struct emu *emu);
|
||||
|
||||
int model_openmp_breakdown_create(struct emu *emu);
|
||||
int model_openmp_breakdown_connect(struct emu *emu);
|
||||
int model_openmp_breakdown_finish(struct emu *emu,
|
||||
const struct pcf_value_label **labels);
|
||||
|
||||
#endif /* OPENMP_PRIV_H */
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2023-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "openmp_priv.h"
|
||||
@ -15,10 +15,8 @@
|
||||
#include "model_cpu.h"
|
||||
#include "model_pvt.h"
|
||||
#include "model_thread.h"
|
||||
#include "proc.h"
|
||||
#include "pv/pcf.h"
|
||||
#include "pv/prv.h"
|
||||
#include "pv/pvt.h"
|
||||
#include "system.h"
|
||||
#include "thread.h"
|
||||
#include "track.h"
|
||||
@ -67,22 +65,12 @@ static struct ev_decl model_evlist[] = {
|
||||
PAIR_B("PCf", "PCF", "fork call")
|
||||
PAIR_B("PCi", "PCI", "initialization")
|
||||
|
||||
/* Task or worksharing type */
|
||||
{ "POc+(u32 typeid, str label)", "creates a type %{typeid} with label \"%{label}\"" },
|
||||
|
||||
{ "PPc(u32 taskid, u32 typeid)", "creates the task %{taskid} with type %{typeid}" },
|
||||
{ "PPx(u32 taskid)", "executes the task %{taskid}" },
|
||||
{ "PPe(u32 taskid)", "ends the task %{taskid}" },
|
||||
|
||||
{ "PQx(u32 typeid)", "begins worksharing with type %{typeid}" },
|
||||
{ "PQe(u32 typeid)", "ends worksharing with type %{typeid}" },
|
||||
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
struct model_spec model_openmp = {
|
||||
.name = model_name,
|
||||
.version = "1.2.1",
|
||||
.version = "1.1.0",
|
||||
.evlist = model_evlist,
|
||||
.model = model_id,
|
||||
.create = model_openmp_create,
|
||||
@ -96,34 +84,24 @@ struct model_spec model_openmp = {
|
||||
|
||||
static const char *chan_name[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = "subsystem",
|
||||
[CH_LABEL] = "label",
|
||||
[CH_TASKID] = "task ID",
|
||||
};
|
||||
|
||||
static const int chan_stack[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = 1,
|
||||
[CH_LABEL] = 1,
|
||||
[CH_TASKID] = 1,
|
||||
};
|
||||
|
||||
static const int chan_dup[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = 1,
|
||||
[CH_LABEL] = 1, /* Two tasks nested with same type */
|
||||
[CH_TASKID] = 1,
|
||||
};
|
||||
|
||||
/* ----------------- pvt ------------------ */
|
||||
|
||||
static const int pvt_type[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = PRV_OPENMP_SUBSYSTEM,
|
||||
[CH_LABEL] = PRV_OPENMP_LABEL,
|
||||
[CH_TASKID] = PRV_OPENMP_TASKID,
|
||||
};
|
||||
|
||||
static const char *pcf_prefix[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = "OpenMP subsystem",
|
||||
[CH_LABEL] = "OpenMP label",
|
||||
[CH_TASKID] = "OpenMP task ID",
|
||||
};
|
||||
|
||||
static const struct pcf_value_label openmp_subsystem_values[] = {
|
||||
@ -171,9 +149,7 @@ static const struct pcf_value_label *pcf_labels[CH_MAX] = {
|
||||
};
|
||||
|
||||
static const long prv_flags[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = PRV_SKIPDUPNULL,
|
||||
[CH_LABEL] = PRV_SKIPDUPNULL,
|
||||
[CH_TASKID] = PRV_SKIPDUPNULL,
|
||||
[CH_SUBSYSTEM] = PRV_EMITDUP,
|
||||
};
|
||||
|
||||
static const struct model_pvt_spec pvt_spec = {
|
||||
@ -187,14 +163,10 @@ static const struct model_pvt_spec pvt_spec = {
|
||||
|
||||
static const int th_track[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = TRACK_TH_ACT,
|
||||
[CH_LABEL] = TRACK_TH_ACT,
|
||||
[CH_TASKID] = TRACK_TH_ACT,
|
||||
};
|
||||
|
||||
static const int cpu_track[CH_MAX] = {
|
||||
[CH_SUBSYSTEM] = TRACK_TH_RUN,
|
||||
[CH_LABEL] = TRACK_TH_RUN,
|
||||
[CH_TASKID] = TRACK_TH_RUN,
|
||||
};
|
||||
|
||||
/* ----------------- chan_spec ------------------ */
|
||||
@ -241,24 +213,9 @@ model_openmp_probe(struct emu *emu)
|
||||
return model_version_probe(&model_openmp, emu);
|
||||
}
|
||||
|
||||
static int
|
||||
init_proc(struct proc *sysproc)
|
||||
{
|
||||
struct openmp_proc *proc = calloc(1, sizeof(struct openmp_proc));
|
||||
if (proc == NULL) {
|
||||
err("calloc failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
extend_set(&sysproc->ext, model_id, proc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
model_openmp_create(struct emu *emu)
|
||||
{
|
||||
|
||||
if (model_thread_create(emu, &th_spec) != 0) {
|
||||
err("model_thread_init failed");
|
||||
return -1;
|
||||
@ -269,28 +226,6 @@ model_openmp_create(struct emu *emu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct system *sys = &emu->system;
|
||||
|
||||
for (struct proc *p = sys->procs; p; p = p->gnext) {
|
||||
if (init_proc(p) != 0) {
|
||||
err("init_proc failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
struct openmp_emu *e = calloc(1, sizeof(struct openmp_emu));
|
||||
if (e == NULL) {
|
||||
err("calloc failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
extend_set(&emu->ext, model_id, e);
|
||||
|
||||
if (model_openmp_breakdown_create(emu) != 0) {
|
||||
err("model_openmp_breakdown_create failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -307,49 +242,6 @@ model_openmp_connect(struct emu *emu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (model_openmp_breakdown_connect(emu) != 0) {
|
||||
err("model_openmp_breakdown_connect failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
create_pcf_type(struct system *sys, struct pcf *pcf, long typeid)
|
||||
{
|
||||
struct pcf_type *pcftype = pcf_find_type(pcf, (int) typeid);
|
||||
|
||||
for (struct proc *p = sys->procs; p; p = p->gnext) {
|
||||
struct openmp_proc *proc = EXT(p, model_id);
|
||||
struct task_info *info = &proc->task_info;
|
||||
if (task_create_pcf_types(pcftype, info->types) != 0) {
|
||||
err("task_create_pcf_types failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
finish_pvt(struct emu *emu, const char *name)
|
||||
{
|
||||
struct system *sys = &emu->system;
|
||||
|
||||
/* Emit task types for all channel types and processes */
|
||||
struct pvt *pvt = recorder_find_pvt(&emu->recorder, name);
|
||||
if (pvt == NULL) {
|
||||
err("cannot find pvt with name '%s'", name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct pcf *pcf = pvt_get_pcf(pvt);
|
||||
if (create_pcf_type(sys, pcf, pvt_type[CH_LABEL]) != 0) {
|
||||
err("create_pcf_type failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -386,22 +278,6 @@ end_lint(struct emu *emu)
|
||||
int
|
||||
model_openmp_finish(struct emu *emu)
|
||||
{
|
||||
/* Fill task types */
|
||||
if (finish_pvt(emu, "thread") != 0) {
|
||||
err("finish_pvt thread failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (finish_pvt(emu, "cpu") != 0) {
|
||||
err("finish_pvt cpu failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (model_openmp_breakdown_finish(emu, pcf_labels) != 0) {
|
||||
err("model_openmp_breakdown_finish failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* When running in linter mode perform additional checks */
|
||||
if (emu->args.linter_mode && end_lint(emu) != 0) {
|
||||
err("end_lint failed");
|
||||
|
||||
@ -492,7 +492,7 @@ init_pcf(struct emu *emu, struct pcf *pcf)
|
||||
static int
|
||||
connect_thread(struct emu *emu)
|
||||
{
|
||||
/* Get thread PVT */
|
||||
/* Get cpu PRV */
|
||||
struct pvt *pvt = recorder_find_pvt(&emu->recorder, "thread");
|
||||
if (pvt == NULL) {
|
||||
err("cannot find thread pvt");
|
||||
@ -555,7 +555,7 @@ connect_cpu_prv(struct emu *emu, struct cpu *scpu, struct prv *prv)
|
||||
}
|
||||
|
||||
/* Then connect the output of the tracking module to the prv
|
||||
* trace for the current cpu */
|
||||
* trace for the current thread */
|
||||
struct chan *out = track_get_output(track);
|
||||
long row = (long) scpu->gindex;
|
||||
long flags = PRV_SKIPDUPNULL;
|
||||
@ -589,7 +589,7 @@ connect_cpu(struct emu *emu)
|
||||
}
|
||||
}
|
||||
|
||||
/* Init cpu PCF */
|
||||
/* Init thread PCF */
|
||||
struct pcf *pcf = pvt_get_pcf(pvt);
|
||||
if (init_pcf(emu, pcf) != 0) {
|
||||
err("init_pcf failed");
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
/* This program is a really bad idea. It attempts to sort streams by using a
|
||||
@ -268,7 +268,7 @@ static void
|
||||
write_stream(int fd, void *base, void *dst, const void *src, size_t size)
|
||||
{
|
||||
while (size > 0) {
|
||||
off_t offset = (off_t) ((intptr_t) dst - (intptr_t) base);
|
||||
off_t offset = (off_t) dst - (off_t) base;
|
||||
ssize_t written = pwrite(fd, src, size, offset);
|
||||
|
||||
if (written < 0)
|
||||
|
||||
@ -1,167 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "cfg_file.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "common.h"
|
||||
|
||||
static const char *color_mode[CFG_COLOR_MODE_MAX] = {
|
||||
[CFG_CODE] = "window_in_code_mode",
|
||||
[CFG_GRAD] = "window_in_gradient_mode",
|
||||
[CFG_NGRAD] = "window_in_null_gradient_mode",
|
||||
[CFG_ALTGRAD] = "window_in_alternative_gradient_mode",
|
||||
[CFG_PUNCT] = "window_in_punctual_mode"
|
||||
};
|
||||
|
||||
static const char *semantic_thread[CFG_SEMANTIC_THREAD_MAX] = {
|
||||
[CFG_LAST_EV_VAL] = "Last Evt Val",
|
||||
[CFG_NEXT_EV_VAL] = "Next Evt Val"
|
||||
};
|
||||
|
||||
static int
|
||||
write_cfg(FILE *f, struct cfg_file *cf)
|
||||
{
|
||||
fprintf(f, "#ParaverCFG\n");
|
||||
fprintf(f, "ConfigFile.Version: 3.4\n");
|
||||
fprintf(f, "ConfigFile.NumWindows: 1\n");
|
||||
|
||||
if (cf->desc[0] != '\0') {
|
||||
fprintf(f, "ConfigFile.BeginDescription\n");
|
||||
fprintf(f, "%s\n", cf->desc);
|
||||
fprintf(f, "ConfigFile.EndDescription\n");
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "################################################################################\n");
|
||||
fprintf(f, "< NEW DISPLAYING WINDOW %s >\n", cf->title);
|
||||
fprintf(f, "################################################################################\n");
|
||||
fprintf(f, "window_name %s\n", cf->title);
|
||||
fprintf(f, "window_type single\n");
|
||||
fprintf(f, "window_id 1\n");
|
||||
fprintf(f, "window_position_x 100\n");
|
||||
fprintf(f, "window_position_y 100\n");
|
||||
fprintf(f, "window_width 600\n");
|
||||
fprintf(f, "window_height 150\n");
|
||||
fprintf(f, "window_comm_lines_enabled true\n");
|
||||
fprintf(f, "window_flags_enabled false\n");
|
||||
fprintf(f, "window_noncolor_mode true\n");
|
||||
fprintf(f, "window_color_mode %s\n", color_mode[cf->color_mode]);
|
||||
fprintf(f, "window_logical_filtered true\n");
|
||||
fprintf(f, "window_physical_filtered false\n");
|
||||
fprintf(f, "window_comm_fromto true\n");
|
||||
fprintf(f, "window_comm_tagsize true\n");
|
||||
fprintf(f, "window_comm_typeval true\n");
|
||||
fprintf(f, "window_units Microseconds\n");
|
||||
fprintf(f, "window_maximum_y 1000.0\n");
|
||||
fprintf(f, "window_minimum_y 1.0\n");
|
||||
fprintf(f, "window_compute_y_max true\n");
|
||||
fprintf(f, "window_level thread\n");
|
||||
fprintf(f, "window_scale_relative 1.000000000000\n");
|
||||
fprintf(f, "window_end_time_relative 1.000000000000\n");
|
||||
fprintf(f, "window_object appl { 1, { All } }\n");
|
||||
fprintf(f, "window_begin_time_relative 0.000000000000\n");
|
||||
fprintf(f, "window_open true\n");
|
||||
fprintf(f, "window_drawmode draw_randnotzero\n");
|
||||
fprintf(f, "window_drawmode_rows draw_randnotzero\n");
|
||||
fprintf(f, "window_pixel_size 1\n");
|
||||
fprintf(f, "window_labels_to_draw 1\n");
|
||||
|
||||
/* We only need to change the thread functions for now. Notice it is one
|
||||
* big line split into multiple fprintf calls for readability. */
|
||||
fprintf(f, "window_selected_functions { 14, { ");
|
||||
fprintf(f, "{cpu, Active Thd}, ");
|
||||
fprintf(f, "{appl, Adding}, ");
|
||||
fprintf(f, "{task, Adding}, ");
|
||||
fprintf(f, "{thread, %s}, ", semantic_thread[cf->semantic_thread]);
|
||||
fprintf(f, "{node, Adding}, ");
|
||||
fprintf(f, "{system, Adding}, ");
|
||||
fprintf(f, "{workload, Adding}, ");
|
||||
fprintf(f, "{from_obj, All}, ");
|
||||
fprintf(f, "{to_obj, All}, ");
|
||||
fprintf(f, "{tag_msg, All}, ");
|
||||
fprintf(f, "{size_msg, All}, ");
|
||||
fprintf(f, "{bw_msg, All}, ");
|
||||
fprintf(f, "{evt_type, =}, ");
|
||||
fprintf(f, "{evt_value, All} ");
|
||||
fprintf(f, "} }\n");
|
||||
|
||||
/* Same for the compose functions, but for now no need to change them */
|
||||
fprintf(f, "window_compose_functions { 9, { ");
|
||||
fprintf(f, "{compose_cpu, As Is}, ");
|
||||
fprintf(f, "{compose_appl, As Is}, ");
|
||||
fprintf(f, "{compose_task, As Is}, ");
|
||||
fprintf(f, "{compose_thread, As Is}, ");
|
||||
fprintf(f, "{compose_node, As Is}, ");
|
||||
fprintf(f, "{compose_system, As Is}, ");
|
||||
fprintf(f, "{compose_workload, As Is}, ");
|
||||
fprintf(f, "{topcompose1, As Is}, ");
|
||||
fprintf(f, "{topcompose2, As Is} ");
|
||||
fprintf(f, "} }\n");
|
||||
|
||||
fprintf(f, "window_filter_module evt_type 1 %ld\n", cf->type);
|
||||
fprintf(f, "window_filter_module evt_type_label 1 \"%s\"\n", cf->title);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
cfg_file_init(struct cfg_file *cf, long type, const char *title)
|
||||
{
|
||||
memset(cf, 0, sizeof(struct cfg_file));
|
||||
|
||||
cf->type = type;
|
||||
|
||||
if (snprintf(cf->title, MAX_LABEL, "%s", title) >= MAX_LABEL)
|
||||
warn("cfg title truncated: %s", title);
|
||||
}
|
||||
|
||||
void
|
||||
cfg_file_desc(struct cfg_file *cf, const char *desc)
|
||||
{
|
||||
if (snprintf(cf->desc, MAX_LABEL, "%s", desc) >= MAX_LABEL)
|
||||
warn("cfg description truncated: %s", desc);
|
||||
}
|
||||
|
||||
void
|
||||
cfg_file_color_mode(struct cfg_file *cf, enum cfg_color_mode cm)
|
||||
{
|
||||
cf->color_mode = cm;
|
||||
}
|
||||
|
||||
void
|
||||
cfg_file_semantic_thread(struct cfg_file *cf, enum cfg_semantic_thread st)
|
||||
{
|
||||
cf->semantic_thread = st;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the cfg_file @param cf to the file at @param path.
|
||||
*
|
||||
* It creates the directories leading to @param path if needed. If a file
|
||||
* already exists, it gets overwritten.
|
||||
*/
|
||||
int
|
||||
cfg_file_write(struct cfg_file *cf, const char *path)
|
||||
{
|
||||
if (mkpath(path, 0755, 0) != 0) {
|
||||
err("cannot create path to %s:", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
FILE *f = fopen(path, "w");
|
||||
if (f == NULL) {
|
||||
err("fopen %s failed:", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (write_cfg(f, cf) != 0) {
|
||||
err("writting configuration file %s failed", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,43 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef CFG_FILE_H
|
||||
#define CFG_FILE_H
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define MAX_LABEL 1024
|
||||
|
||||
/** Controls the "Paint As" option */
|
||||
enum cfg_color_mode {
|
||||
CFG_CODE = 0, /* Default */
|
||||
CFG_GRAD,
|
||||
CFG_NGRAD,
|
||||
CFG_ALTGRAD,
|
||||
CFG_PUNCT,
|
||||
CFG_COLOR_MODE_MAX
|
||||
};
|
||||
|
||||
/** Controls how segments are painted from events. */
|
||||
enum cfg_semantic_thread {
|
||||
CFG_LAST_EV_VAL = 0, /* Default */
|
||||
CFG_NEXT_EV_VAL,
|
||||
CFG_SEMANTIC_THREAD_MAX
|
||||
};
|
||||
|
||||
/** Represents a .cfg file on disk. Needed fields to generate the file. */
|
||||
struct cfg_file {
|
||||
char title[MAX_LABEL];
|
||||
char desc[MAX_LABEL];
|
||||
long type;
|
||||
enum cfg_color_mode color_mode;
|
||||
enum cfg_semantic_thread semantic_thread;
|
||||
};
|
||||
|
||||
void cfg_file_init(struct cfg_file *cf, long type, const char *title);
|
||||
void cfg_file_desc(struct cfg_file *cf, const char *desc);
|
||||
void cfg_file_color_mode(struct cfg_file *cf, enum cfg_color_mode cm);
|
||||
void cfg_file_semantic_thread(struct cfg_file *cf, enum cfg_semantic_thread st);
|
||||
USE_RET int cfg_file_write(struct cfg_file *cf, const char *path);
|
||||
|
||||
#endif /* CFG_FILE_H */
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "recorder.h"
|
||||
@ -19,12 +19,6 @@ recorder_init(struct recorder *rec, const char *dir)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* TODO: Use configs per pvt */
|
||||
if (cfg_generate(rec->dir) != 0) {
|
||||
err("cfg_generate failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -87,5 +81,11 @@ recorder_finish(struct recorder *rec)
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Use configs per pvt */
|
||||
if (cfg_generate(rec->dir) != 0) {
|
||||
err("cfg_generate failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -279,6 +279,7 @@ ovni_proc_init(int app, const char *loom, int pid)
|
||||
static int
|
||||
move_thread_to_final(const char *src, const char *dst)
|
||||
{
|
||||
info("moving src=%s to dst=%s", src, dst);
|
||||
char buffer[1024];
|
||||
|
||||
FILE *infile = fopen(src, "r");
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2021-2022 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
set(OVNI_TEST_SOURCE_DIR "${CMAKE_SOURCE_DIR}/test")
|
||||
@ -9,4 +9,3 @@ include(macros.cmake)
|
||||
add_subdirectory(unit)
|
||||
add_subdirectory(emu)
|
||||
add_subdirectory(rt)
|
||||
add_subdirectory(bench)
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
# Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(STATUS "Enabling bench tests")
|
||||
else()
|
||||
message(STATUS "Disabling bench tests as ENABLE_ALL_TEST not set")
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_subdirectory(openmp)
|
||||
@ -1,38 +0,0 @@
|
||||
# Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
if(NOT LIBOMPV_FOUND)
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable libompv bench tests")
|
||||
else()
|
||||
message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling libompv bench tests")
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(NOT NOSV_FOUND)
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(FATAL_ERROR "nOS-V not found, cannot enable libompv bench tests")
|
||||
else()
|
||||
message(STATUS "nOS-V not found, disabling libompv bench tests")
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
|
||||
message(STATUS "Enabling libompv bench tests")
|
||||
|
||||
function(openmp_bench_test)
|
||||
ovni_test(${ARGN} SORT)
|
||||
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
|
||||
ENVIRONMENT "OMP_OVNI=1")
|
||||
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
|
||||
ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni,ovni.level=3")
|
||||
set_tests_properties("${OVNI_TEST_NAME}" PROPERTIES TIMEOUT 300)
|
||||
endfunction()
|
||||
|
||||
find_program(B6_HEAT_OMPV_PATH b6_heat_ompv REQUIRED)
|
||||
find_program(B6_HEAT_OMPV_PATH b6_heat_itampi_nodes_tasks REQUIRED)
|
||||
|
||||
openmp_bench_test(dummy.c NAME b6_heat_ompv DRIVER b6_heat_ompv.sh)
|
||||
openmp_bench_test(dummy.c NAME b6_heat_itampi_nodes_tasks DRIVER b6_heat_itampi_nodes_tasks.sh)
|
||||
openmp_bench_test(dummy.c NAME b6_heat_tampi_ompv DRIVER b6_heat_tampi_ompv.sh)
|
||||
@ -1,24 +0,0 @@
|
||||
export NOSV_APPID=1
|
||||
export NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni,ovni.level=2
|
||||
#export OMP_NUM_THREADS=1
|
||||
export OMP_OVNI=1
|
||||
|
||||
# FIXME: Disable OFI for now as we don't have a working hfi network
|
||||
export FI_PROVIDER=sockets
|
||||
b6_heat_itampi_nodes_tasks -b 128 -t 10
|
||||
|
||||
ovnisort ovni
|
||||
ovnitop ovni
|
||||
ovniemu -b ovni
|
||||
|
||||
# Make sure the trace is not too big (limit at 128 MiB)
|
||||
maxsize=$((128 * 1024 * 1024))
|
||||
|
||||
for f in ovni/{cpu,thread}.prv; do
|
||||
size=$(stat -c %s $f)
|
||||
if [ $size -lt $maxsize ]; then
|
||||
echo "$f: size ok ($size < $maxsize)"
|
||||
else
|
||||
echo "$f: too big ($size >= $maxsize)"
|
||||
fi
|
||||
done
|
||||
@ -1,22 +0,0 @@
|
||||
export NOSV_APPID=1
|
||||
export NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni,ovni.level=2
|
||||
#export OMP_NUM_THREADS=1
|
||||
export OMP_OVNI=1
|
||||
|
||||
b6_heat_ompv -b 128 -t 10
|
||||
|
||||
ovnisort ovni
|
||||
ovnitop ovni
|
||||
ovniemu -b ovni
|
||||
|
||||
# Make sure the trace is not too big (limit at 128 MiB)
|
||||
maxsize=$((128 * 1024 * 1024))
|
||||
|
||||
for f in ovni/{cpu,thread,openmp-breakdown}.prv; do
|
||||
size=$(stat -c %s $f)
|
||||
if [ $size -lt $maxsize ]; then
|
||||
echo "$f: size ok ($size < $maxsize)"
|
||||
else
|
||||
echo "$f: too big ($size >= $maxsize)"
|
||||
fi
|
||||
done
|
||||
@ -1,23 +0,0 @@
|
||||
export NOSV_APPID=1
|
||||
export NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni,ovni.level=2
|
||||
export OMP_OVNI=1
|
||||
|
||||
# FIXME: Disable OFI for now as we don't have a working hfi network
|
||||
export FI_PROVIDER=sockets
|
||||
b6_heat_tampi_ompv -b 256 -t 4
|
||||
|
||||
ovnisort ovni
|
||||
ovnitop ovni
|
||||
ovniemu -b ovni
|
||||
|
||||
# Make sure the trace is not too big (limit at 128 MiB)
|
||||
maxsize=$((128 * 1024 * 1024))
|
||||
|
||||
for f in ovni/{cpu,thread,openmp-breakdown}.prv; do
|
||||
size=$(stat -c %s $f)
|
||||
if [ $size -lt $maxsize ]; then
|
||||
echo "$f: size ok ($size < $maxsize)"
|
||||
else
|
||||
echo "$f: too big ($size >= $maxsize)"
|
||||
fi
|
||||
done
|
||||
@ -1 +0,0 @@
|
||||
int main(void) { return 0; }
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2022-2023 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
function(test_emu)
|
||||
@ -13,4 +13,3 @@ add_subdirectory(nosv)
|
||||
add_subdirectory(nanos6)
|
||||
add_subdirectory(tampi)
|
||||
add_subdirectory(mpi)
|
||||
add_subdirectory(openmp)
|
||||
|
||||
@ -6,17 +6,10 @@
|
||||
int first_clock_set = 0;
|
||||
int64_t first_clock; /* First clock */
|
||||
int64_t last_clock; /* Clock from the last event */
|
||||
int64_t next_clock = -1; /* Clock for the next event */
|
||||
|
||||
int64_t get_clock(void)
|
||||
{
|
||||
if (next_clock >= 0) {
|
||||
last_clock = next_clock;
|
||||
next_clock = -1;
|
||||
} else {
|
||||
last_clock = (int64_t) ovni_clock_now();
|
||||
}
|
||||
|
||||
last_clock = (int64_t) ovni_clock_now();
|
||||
if (first_clock_set == 0) {
|
||||
first_clock = last_clock;
|
||||
first_clock_set = 1;
|
||||
@ -25,11 +18,6 @@ int64_t get_clock(void)
|
||||
return last_clock;
|
||||
}
|
||||
|
||||
void set_clock(int64_t t)
|
||||
{
|
||||
next_clock = t;
|
||||
}
|
||||
|
||||
int64_t get_delta(void)
|
||||
{
|
||||
return last_clock - first_clock;
|
||||
|
||||
@ -17,7 +17,6 @@ extern int64_t first_clock;
|
||||
extern int64_t last_clock;
|
||||
|
||||
int64_t get_clock(void);
|
||||
void set_clock(int64_t t);
|
||||
int64_t get_delta(void);
|
||||
|
||||
#define INSTR_0ARG(name, mcv) \
|
||||
@ -117,6 +116,7 @@ instr_start(int rank, int nranks)
|
||||
dbg("thread %d has cpu %d (ncpus=%d)",
|
||||
get_tid(), curcpu, nranks);
|
||||
|
||||
instr_require("ovni");
|
||||
instr_thread_execute(curcpu, -1, 0);
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2022-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
test_emu(attach.c)
|
||||
@ -18,12 +18,10 @@ test_emu(require-missing.c
|
||||
NAME flag-enable-all
|
||||
DRIVER "flag-enable-all.driver.sh"
|
||||
REGEX "all .* models are enabled")
|
||||
test_emu(cannot-breakdown.c BREAKDOWN REGEX "WARN: cannot enable breakdown for nOS-V model")
|
||||
test_emu(parallel-tasks.c)
|
||||
test_emu(nest-to-parallel.c)
|
||||
test_emu(mutex.c)
|
||||
test_emu(barrier.c)
|
||||
test_emu(cond.c)
|
||||
|
||||
test_emu(bad-nest-same-task.c SHOULD_FAIL
|
||||
REGEX "body_execute: refusing to run body(id=1,taskid=1) in Paused state, needs to resume intead")
|
||||
@ -38,5 +36,3 @@ test_emu(bad-nest-from-parallel.c SHOULD_FAIL
|
||||
|
||||
test_emu(events-from-outside-cpu.c SHOULD_FAIL
|
||||
REGEX "current thread .* out of CPU")
|
||||
|
||||
test_emu(hwc.c)
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <ovni.h>
|
||||
#include "instr_nosv.h"
|
||||
|
||||
/* Test that we can request the -b flag but still have threads that don't have
|
||||
* the nosv.can_breakdown attribute set to true. OpenMP may enable the breakdown
|
||||
* on its own. */
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
instr_start(0, 1);
|
||||
/* Don't enable nosv model via instr_nosv_init() as that would set the
|
||||
* nosv.can_breakdown to 1 */
|
||||
instr_require("nosv");
|
||||
ovni_attr_set_boolean("nosv.can_breakdown", 0);
|
||||
|
||||
/* Emit a nosv event */
|
||||
instr_nosv_type_create(666);
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,32 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
#include "instr_nosv.h"
|
||||
|
||||
/* Test the nosv_cond_wait(), nosv_cond_broadcast() and nosv_cond_signal() API
|
||||
* events, introduced in the nOS-V model 2.4.0 */
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
instr_start(0, 1);
|
||||
instr_nosv_init();
|
||||
|
||||
instr_nosv_cond_wait_enter();
|
||||
sleep_us(100);
|
||||
instr_nosv_cond_wait_exit();
|
||||
|
||||
instr_nosv_cond_broadcast_enter();
|
||||
sleep_us(100);
|
||||
instr_nosv_cond_broadcast_exit();
|
||||
|
||||
instr_nosv_cond_signal_enter();
|
||||
sleep_us(100);
|
||||
instr_nosv_cond_signal_exit();
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
#include "instr_nosv.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
instr_start(0, 1);
|
||||
instr_nosv_init();
|
||||
|
||||
enum hwc { PAPI_TOT_INS = 0, PAPI_TOT_CYC = 1, MAX_HWC };
|
||||
int64_t hwc[MAX_HWC] = { 0 };
|
||||
|
||||
ovni_attr_set_str("nosv.hwc.0.name", "PAPI_TOT_INS");
|
||||
ovni_attr_set_str("nosv.hwc.1.name", "PAPI_TOT_CYC");
|
||||
|
||||
instr_nosv_hwc(MAX_HWC, hwc);
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
sleep_us(100);
|
||||
|
||||
/* Dummy counters */
|
||||
hwc[PAPI_TOT_INS] = 50 + (rand() % 100);
|
||||
hwc[PAPI_TOT_CYC] = 100 + (rand() % 200);
|
||||
|
||||
instr_nosv_hwc(MAX_HWC, hwc);
|
||||
}
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef INSTR_NOSV_H
|
||||
@ -38,18 +38,6 @@ instr_nosv_type_create(int32_t typeid)
|
||||
return task_get_type_gid(p);
|
||||
}
|
||||
|
||||
static inline void
|
||||
instr_nosv_hwc(size_t n, int64_t *counters)
|
||||
{
|
||||
struct ovni_ev ev = {0};
|
||||
|
||||
ovni_ev_set_mcv(&ev, "VWC");
|
||||
ovni_ev_set_clock(&ev, (uint64_t) get_clock());
|
||||
|
||||
uint32_t nbytes = (uint32_t) (n * sizeof(int64_t));
|
||||
ovni_ev_jumbo_emit(&ev, (uint8_t *) counters, (uint32_t) nbytes);
|
||||
}
|
||||
|
||||
INSTR_2ARG(instr_nosv_task_create, "VTc", uint32_t, taskid, uint32_t, typeid)
|
||||
INSTR_2ARG(instr_nosv_task_create_par, "VTC", uint32_t, taskid, uint32_t, typeid)
|
||||
INSTR_2ARG(instr_nosv_task_execute, "VTx", uint32_t, taskid, uint32_t, bodyid)
|
||||
@ -71,12 +59,6 @@ INSTR_0ARG(instr_nosv_mutex_trylock_enter, "VAt")
|
||||
INSTR_0ARG(instr_nosv_mutex_trylock_exit, "VAT")
|
||||
INSTR_0ARG(instr_nosv_mutex_unlock_enter, "VAu")
|
||||
INSTR_0ARG(instr_nosv_mutex_unlock_exit, "VAU")
|
||||
INSTR_0ARG(instr_nosv_cond_wait_enter, "VAo")
|
||||
INSTR_0ARG(instr_nosv_cond_wait_exit, "VAO")
|
||||
INSTR_0ARG(instr_nosv_cond_signal_enter, "VAg")
|
||||
INSTR_0ARG(instr_nosv_cond_signal_exit, "VAG")
|
||||
INSTR_0ARG(instr_nosv_cond_broadcast_enter, "VAk")
|
||||
INSTR_0ARG(instr_nosv_cond_broadcast_exit, "VAK")
|
||||
INSTR_0ARG(instr_nosv_attached, "VHa") /* deprecated */
|
||||
INSTR_0ARG(instr_nosv_detached, "VHA") /* deprecated */
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2023-2025 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdio.h>
|
||||
@ -31,8 +31,8 @@ main(void)
|
||||
die("snprintf failed");
|
||||
|
||||
ovni_proc_init(1 + app, loom, getpid());
|
||||
ovni_thread_init(get_tid());
|
||||
ovni_proc_set_rank(rank, nranks);
|
||||
ovni_thread_init(get_tid());
|
||||
|
||||
/* Leader of the segment, must emit CPUs */
|
||||
if (rank % N == 0) {
|
||||
@ -47,6 +47,7 @@ main(void)
|
||||
int nlooms = nranks / N;
|
||||
int lcpu = rank % N;
|
||||
|
||||
instr_require("ovni");
|
||||
instr_nosv_init();
|
||||
instr_thread_execute(lcpu, -1, 0);
|
||||
|
||||
|
||||
@ -1,6 +0,0 @@
|
||||
# Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
test_emu(nested-ws.c)
|
||||
test_emu(nested-task.c)
|
||||
test_emu(mix-task-ws.c)
|
||||
@ -1,47 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef INSTR_OPENMP_H
|
||||
#define INSTR_OPENMP_H
|
||||
|
||||
#include "instr.h"
|
||||
|
||||
#include "task.h"
|
||||
|
||||
static inline void
|
||||
instr_openmp_init(void)
|
||||
{
|
||||
instr_require("openmp");
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
instr_openmp_type_create(uint32_t typeid, const char *label)
|
||||
{
|
||||
struct ovni_ev ev = {0};
|
||||
|
||||
ovni_ev_set_mcv(&ev, "POc");
|
||||
ovni_ev_set_clock(&ev, (uint64_t) get_clock());
|
||||
|
||||
char buf[256];
|
||||
char *p = buf;
|
||||
|
||||
size_t nbytes = 0;
|
||||
memcpy(buf, &typeid, sizeof(typeid));
|
||||
p += sizeof(typeid);
|
||||
nbytes += sizeof(typeid);
|
||||
sprintf(p, "%s.%d", label, typeid);
|
||||
nbytes += strlen(p) + 1;
|
||||
|
||||
ovni_ev_jumbo_emit(&ev, (uint8_t *) buf, (uint32_t) nbytes);
|
||||
|
||||
return task_get_type_gid(p);
|
||||
}
|
||||
|
||||
INSTR_2ARG(instr_openmp_task_create, "PPc", uint32_t, taskid, uint32_t, typeid)
|
||||
INSTR_1ARG(instr_openmp_task_execute, "PPx", uint32_t, taskid)
|
||||
INSTR_1ARG(instr_openmp_task_end, "PPe", uint32_t, taskid)
|
||||
|
||||
INSTR_1ARG(instr_openmp_ws_enter, "PQx", uint32_t, typeid)
|
||||
INSTR_1ARG(instr_openmp_ws_exit, "PQe", uint32_t, typeid)
|
||||
|
||||
#endif /* INSTR_OPENMP_H */
|
||||
@ -1,47 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
#include "instr_openmp.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
instr_start(0, 1);
|
||||
instr_openmp_init();
|
||||
|
||||
enum { TASK=1, WS1=2, WS2=3 };
|
||||
instr_openmp_type_create(TASK, "main task");
|
||||
instr_openmp_type_create(WS1, "outer for");
|
||||
instr_openmp_type_create(WS2, "inner for");
|
||||
|
||||
instr_openmp_task_create(1, TASK);
|
||||
instr_openmp_task_execute(1);
|
||||
sleep_us(100);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
instr_openmp_ws_enter(WS1);
|
||||
sleep_us(10);
|
||||
{
|
||||
instr_openmp_ws_enter(WS2);
|
||||
sleep_us(10);
|
||||
instr_openmp_ws_exit(WS2);
|
||||
}
|
||||
sleep_us(10);
|
||||
instr_openmp_ws_exit(WS1);
|
||||
sleep_us(10);
|
||||
}
|
||||
sleep_us(10);
|
||||
instr_openmp_task_end(1);
|
||||
sleep_us(10);
|
||||
/* Another task from the same type */
|
||||
instr_openmp_task_create(2, TASK);
|
||||
instr_openmp_task_execute(2);
|
||||
sleep_us(100);
|
||||
instr_openmp_task_end(2);
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,32 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
#include "instr_openmp.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
instr_start(0, 1);
|
||||
instr_openmp_init();
|
||||
|
||||
enum { TYPE=1, A=1, B=2 };
|
||||
|
||||
instr_openmp_type_create(TYPE, "task");
|
||||
instr_openmp_task_create(A, TYPE);
|
||||
instr_openmp_task_create(B, TYPE);
|
||||
|
||||
instr_openmp_task_execute(A);
|
||||
sleep_us(100);
|
||||
instr_openmp_task_execute(B);
|
||||
sleep_us(100);
|
||||
instr_openmp_task_end(B);
|
||||
sleep_us(100);
|
||||
instr_openmp_task_end(A);
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,33 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
#include "instr_openmp.h"
|
||||
|
||||
#define N 30
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
instr_start(0, 1);
|
||||
instr_openmp_init();
|
||||
|
||||
for (uint32_t type = 1; type <= N; type++)
|
||||
instr_openmp_type_create(type, "ws");
|
||||
|
||||
for (uint32_t type = 1; type <= N; type++) {
|
||||
instr_openmp_ws_enter(type);
|
||||
sleep_us(100);
|
||||
}
|
||||
|
||||
for (uint32_t type = N; type > 0; type--) {
|
||||
instr_openmp_ws_exit(type);
|
||||
sleep_us(100);
|
||||
}
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2022-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
test_emu(flush-overhead.c DISABLED)
|
||||
@ -11,7 +11,6 @@ test_emu(sort-first-and-full-ring.c SORT
|
||||
SHOULD_FAIL REGEX "cannot find a event previous to clock")
|
||||
test_emu(burst-stats.c REGEX "burst stats: median/avg/max = 33/ 33/ 33 ns")
|
||||
test_emu(mp-simple.c MP)
|
||||
test_emu(partial-cpus.c MP)
|
||||
test_emu(merge-cpus-loom.c MP)
|
||||
test_emu(version-good.c)
|
||||
test_emu(version-bad.c SHOULD_FAIL REGEX "incompatible .* version")
|
||||
@ -33,5 +32,3 @@ test_emu(dummy.c NAME "match-doc-events" DRIVER "match-doc-events.sh")
|
||||
test_emu(dummy.c NAME "match-doc-version" DRIVER "match-doc-version.sh")
|
||||
test_emu(libovni-attr.c)
|
||||
test_emu(libovni-mark.c MP)
|
||||
test_emu(split-loom-cpus.c MP)
|
||||
test_emu(duplicated-cpu-index.c MP SHOULD_FAIL REGEX "cpu with index 0 already taken")
|
||||
|
||||
@ -39,6 +39,7 @@ start_delayed(int rank, int nranks)
|
||||
ovni_proc_init(1, rankname, getpid());
|
||||
ovni_thread_init(get_tid());
|
||||
ovni_proc_set_rank(rank, nranks);
|
||||
instr_require("ovni");
|
||||
|
||||
/* All ranks inform CPUs */
|
||||
for (int i = 0; i < nranks; i++)
|
||||
|
||||
@ -1,36 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int rank = atoi(getenv("OVNI_RANK"));
|
||||
int nranks = atoi(getenv("OVNI_NRANKS"));
|
||||
|
||||
if (nranks < 2)
|
||||
die("need at least 2 ranks");
|
||||
|
||||
char hostname[OVNI_MAX_HOSTNAME];
|
||||
|
||||
if (gethostname(hostname, OVNI_MAX_HOSTNAME) != 0)
|
||||
die("gethostname failed");
|
||||
|
||||
ovni_version_check();
|
||||
ovni_proc_init(1, hostname, getpid());
|
||||
ovni_thread_init(get_tid());
|
||||
|
||||
/* Wrongly set the logical index to 0 always */
|
||||
ovni_add_cpu(0, rank);
|
||||
|
||||
instr_thread_execute(rank, -1, 0);
|
||||
|
||||
sleep_us(50);
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -31,6 +31,7 @@ start(int rank, int nranks)
|
||||
dbg("thread %d has cpu %d (ncpus=%d)",
|
||||
get_tid(), curcpu, nranks);
|
||||
|
||||
instr_require("ovni");
|
||||
instr_thread_execute(curcpu, -1, 0);
|
||||
}
|
||||
|
||||
|
||||
@ -1,34 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include "common.h"
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
#include "ovni.h"
|
||||
|
||||
/* Ensures that we can emit a partial list of CPUs. */
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int rank = atoi(getenv("OVNI_RANK"));
|
||||
int nranks = atoi(getenv("OVNI_NRANKS"));
|
||||
|
||||
if (nranks < 2)
|
||||
die("needs at least 2 ranks");
|
||||
|
||||
ovni_proc_init(1, "loom0", getpid());
|
||||
ovni_thread_init(get_tid());
|
||||
|
||||
int i = rank;
|
||||
|
||||
/* Only emit one CPU per thread */
|
||||
ovni_add_cpu(i, i);
|
||||
|
||||
instr_thread_execute(i, -1, 0);
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -35,6 +35,7 @@ main(void)
|
||||
for (int i = 0; i < N; i++)
|
||||
ovni_add_cpu(i, cpus[i]);
|
||||
|
||||
instr_require("ovni");
|
||||
instr_thread_execute(-1, -1, 0);
|
||||
|
||||
instr_end();
|
||||
|
||||
@ -36,6 +36,7 @@ main(void)
|
||||
for (int i = 0; i < N; i++)
|
||||
ovni_add_cpu(i, cpus[i]);
|
||||
|
||||
instr_require("ovni");
|
||||
instr_thread_execute(-1, -1, 0);
|
||||
|
||||
instr_end();
|
||||
|
||||
@ -36,11 +36,12 @@ emit(char *mcv, uint64_t clock, int size)
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
set_clock(1);
|
||||
instr_start(0, 1);
|
||||
|
||||
/* Leave some room to prevent clashes */
|
||||
uint64_t t0 = 100;
|
||||
sleep_us(100); /* 100000 us */
|
||||
uint64_t t0 = ovni_clock_now();
|
||||
sleep_us(100); /* 100000 us */
|
||||
|
||||
/* We want it to end like this:
|
||||
*
|
||||
@ -67,7 +68,6 @@ main(void)
|
||||
emit("OB.", t0 + 9, 0);
|
||||
emit("OU]", t0 + 11, 0);
|
||||
|
||||
set_clock(200);
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
|
||||
@ -3,23 +3,21 @@ target=$OVNI_TEST_BIN
|
||||
$target
|
||||
ovnisort ovni
|
||||
ovnidump ovni
|
||||
ovnidump ovni | awk '{print $1,$2}' > found
|
||||
ovnidump ovni | awk 'NR == 1 {next} NR==2{t=$1} {print $1-t,$2} NR==13{exit}' > found
|
||||
|
||||
cat > expected <<EOF
|
||||
1 OHx
|
||||
100 OB.
|
||||
101 OB.
|
||||
102 OB.
|
||||
103 OB.
|
||||
104 OB.
|
||||
105 OB.
|
||||
106 OU[
|
||||
107 OU]
|
||||
108 OB.
|
||||
109 OB.
|
||||
110 OU[
|
||||
111 OU]
|
||||
200 OHe
|
||||
0 OB.
|
||||
1 OB.
|
||||
2 OB.
|
||||
3 OB.
|
||||
4 OB.
|
||||
5 OB.
|
||||
6 OU[
|
||||
7 OU]
|
||||
8 OB.
|
||||
9 OB.
|
||||
10 OU[
|
||||
11 OU]
|
||||
EOF
|
||||
|
||||
diff -s found expected
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "compat.h"
|
||||
#include "instr.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int rank = atoi(getenv("OVNI_RANK"));
|
||||
int nranks = atoi(getenv("OVNI_NRANKS"));
|
||||
|
||||
if (nranks < 2)
|
||||
die("need at least 2 ranks");
|
||||
|
||||
char hostname[OVNI_MAX_HOSTNAME];
|
||||
|
||||
if (gethostname(hostname, OVNI_MAX_HOSTNAME) != 0)
|
||||
die("gethostname failed");
|
||||
|
||||
ovni_version_check();
|
||||
ovni_proc_init(1, hostname, getpid());
|
||||
ovni_thread_init(get_tid());
|
||||
|
||||
/* Define only one CPU per rank but in reverse order so they are not
|
||||
* processed in increasing index order */
|
||||
int cpu = nranks - rank - 1;
|
||||
ovni_add_cpu(cpu, cpu);
|
||||
|
||||
instr_thread_execute(cpu, -1, 0);
|
||||
|
||||
sleep_us(50);
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2023-2024 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -13,6 +13,7 @@ main(void)
|
||||
ovni_thread_init(123);
|
||||
|
||||
ovni_add_cpu(0, 0);
|
||||
instr_require("ovni");
|
||||
|
||||
instr_thread_execute(0, -1, 0);
|
||||
instr_end();
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
set -ex
|
||||
@ -30,24 +30,10 @@ if [ -n "$OVNI_DRIVER" ]; then
|
||||
else
|
||||
if [ "$OVNI_NPROCS" -gt 1 ]; then
|
||||
for i in $(seq 1 "$OVNI_NPROCS"); do
|
||||
rank=$(($i-1))
|
||||
# Run the test in the background
|
||||
(OVNI_RANK=$rank OVNI_NRANKS=$OVNI_NPROCS "$testname" && echo ok > ovni.rc.$rank) &
|
||||
OVNI_RANK=$(($i-1)) OVNI_NRANKS=$OVNI_NPROCS "$testname" &
|
||||
done
|
||||
wait
|
||||
all_good=1
|
||||
for i in $(seq 1 "$OVNI_NPROCS"); do
|
||||
rank=$(($i-1))
|
||||
# Run the test in the background
|
||||
if ! grep -q ok "ovni.rc.$rank"; then
|
||||
echo "Test for rank $rank failed"
|
||||
all_good=0
|
||||
fi
|
||||
done
|
||||
if [ "$all_good" != 1 ]; then
|
||||
echo "Some tests failed"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
"$testname"
|
||||
fi
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
# Copyright (c) 2022-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
find_package(Nanos6)
|
||||
include(CheckOmpSs2Compiler)
|
||||
|
||||
if(NOT OMPSS2_COMPILER_FOUND)
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(FATAL_ERROR "Compiler doesn't support -fompss-2 flag, cannot enable Nanos6 RT tests")
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
# Copyright (c) 2022-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
find_package(Nodes)
|
||||
include(CheckOmpSs2Compiler)
|
||||
|
||||
if(NOT OMPSS2_COMPILER_FOUND)
|
||||
@ -64,13 +65,15 @@ nodes_rt_test(../nanos6/several-tasks.c NAME several-tasks-level-4 LEVEL 4 SORT)
|
||||
|
||||
# Same but with breakdown enabled
|
||||
nodes_rt_test(../nanos6/several-tasks.c NAME several-tasks-breakdown-level-1 LEVEL 1 SORT BREAKDOWN
|
||||
REGEX "cannot enable breakdown for nOS-V model")
|
||||
SHOULD_FAIL REGEX "nosv.can_breakdown is false, missing events to enable breakdown")
|
||||
nodes_rt_test(../nanos6/several-tasks.c NAME several-tasks-breakdown-level-2 LEVEL 2 SORT BREAKDOWN
|
||||
REGEX "cannot enable breakdown for nOS-V model")
|
||||
SHOULD_FAIL REGEX "nosv.can_breakdown is false, missing events to enable breakdown")
|
||||
# From level 3 up the breakdown can be enabled
|
||||
nodes_rt_test(../nanos6/several-tasks.c NAME several-tasks-breakdown-level-3 LEVEL 3 SORT BREAKDOWN)
|
||||
nodes_rt_test(../nanos6/several-tasks.c NAME several-tasks-breakdown-level-4 LEVEL 4 SORT BREAKDOWN)
|
||||
|
||||
|
||||
include(CheckPerfParanoid)
|
||||
if (PERF_PARANOID_KERNEL)
|
||||
message(STATUS "Enabling perf paranoid tests for NODES")
|
||||
nodes_rt_test(fibonacci.c SORT LEVEL 3 DRIVER "fibonacci.driver.sh")
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
# Copyright (c) 2021-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2021-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
find_package(Nosv)
|
||||
|
||||
if(NOT NOSV_FOUND)
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(FATAL_ERROR "nOS-V not found, cannot enable nOS-V RT tests")
|
||||
@ -10,9 +12,9 @@ if(NOT NOSV_FOUND)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Needed for nosv_cond
|
||||
if("${NOSV_VERSION}" VERSION_LESS "3.1.0")
|
||||
message(FATAL_ERROR "nOS-V version ${NOSV_VERSION} too old, required at least 3.1.0")
|
||||
# Needed for breakdown and kernel fix
|
||||
if("${NOSV_VERSION}" VERSION_LESS "2.3.1")
|
||||
message(FATAL_ERROR "nOS-V version ${NOSV_VERSION} too old, required at least 2.3.1")
|
||||
endif()
|
||||
|
||||
message(STATUS "Enabling nOS-V RT tests")
|
||||
@ -24,7 +26,7 @@ function(nosv_test)
|
||||
cmake_parse_arguments(
|
||||
NOSV_TEST "${switches}" "${single}" "${multi}" ${ARGN})
|
||||
ovni_test(${ARGN})
|
||||
target_link_libraries("${OVNI_TEST_NAME}" PRIVATE PkgConfig::NOSV m)
|
||||
target_link_libraries("${OVNI_TEST_NAME}" PRIVATE PkgConfig::NOSV)
|
||||
set_property(TEST "${OVNI_TEST_NAME}" APPEND
|
||||
PROPERTY
|
||||
ENVIRONMENT "NOSV_CONFIG=${OVNI_TEST_SOURCE_DIR}/rt/nosv/nosv.toml")
|
||||
@ -42,10 +44,6 @@ nosv_test(parallel-tasks.c SORT)
|
||||
nosv_test(inline.c SORT)
|
||||
nosv_test(mutex.c SORT LEVEL 3)
|
||||
nosv_test(barrier.c SORT LEVEL 3)
|
||||
nosv_test(cond.c SORT LEVEL 3)
|
||||
nosv_test(yield-noisy.c NAME "yield-noisy-l3" DRIVER "yield-noisy-l3.driver.sh")
|
||||
nosv_test(yield-noisy.c NAME "yield-noisy-l4" DRIVER "yield-noisy-l4.driver.sh")
|
||||
nosv_test(resubmit-race.c SORT)
|
||||
|
||||
# Test multiple instrumentation levels
|
||||
nosv_test(several-tasks.c SORT NAME several-tasks-level-0 LEVEL 0)
|
||||
@ -56,18 +54,16 @@ nosv_test(several-tasks.c SORT NAME several-tasks-level-4 LEVEL 4)
|
||||
|
||||
# Same but with breakdown enabled
|
||||
nosv_test(several-tasks.c SORT NAME several-tasks-breakdown-level-0 LEVEL 0 BREAKDOWN
|
||||
REGEX "cannot enable breakdown for nOS-V model")
|
||||
SHOULD_FAIL REGEX "nosv.can_breakdown is false, missing events to enable breakdown")
|
||||
nosv_test(several-tasks.c SORT NAME several-tasks-breakdown-level-1 LEVEL 1 BREAKDOWN
|
||||
REGEX "cannot enable breakdown for nOS-V model")
|
||||
SHOULD_FAIL REGEX "nosv.can_breakdown is false, missing events to enable breakdown")
|
||||
nosv_test(several-tasks.c SORT NAME several-tasks-breakdown-level-2 LEVEL 2 BREAKDOWN
|
||||
REGEX "cannot enable breakdown for nOS-V model")
|
||||
SHOULD_FAIL REGEX "nosv.can_breakdown is false, missing events to enable breakdown")
|
||||
# From level 3 up the breakdown can be enabled
|
||||
nosv_test(several-tasks.c SORT NAME several-tasks-breakdown-level-3 LEVEL 3 BREAKDOWN)
|
||||
nosv_test(several-tasks.c SORT NAME several-tasks-breakdown-level-4 LEVEL 4 BREAKDOWN)
|
||||
|
||||
nosv_test(several-tasks.c NAME hwc-tasks DRIVER "hwc.driver.sh")
|
||||
nosv_test(hwc-stride.c DRIVER "hwc-stride.driver.sh")
|
||||
|
||||
include(CheckPerfParanoid)
|
||||
if (PERF_PARANOID_KERNEL)
|
||||
message(STATUS "Enabling perf paranoid tests for nOS-V")
|
||||
nosv_test(kernel.c NAME kernel-overflow DRIVER "kernel-overflow.driver.sh")
|
||||
|
||||
@ -1,123 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#define _DEFAULT_SOURCE
|
||||
|
||||
#include <nosv.h>
|
||||
#include <nosv/affinity.h>
|
||||
#include <stdatomic.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "compat.h"
|
||||
|
||||
#define NTASKS 200
|
||||
atomic_int nstarted = 0;
|
||||
atomic_int ncompleted = 0;
|
||||
atomic_int ready = 0;
|
||||
|
||||
nosv_task_t tasks[NTASKS];
|
||||
nosv_cond_t cond;
|
||||
nosv_mutex_t mutex;
|
||||
|
||||
static void
|
||||
task_body(nosv_task_t task)
|
||||
{
|
||||
UNUSED(task);
|
||||
atomic_fetch_add(&nstarted, 1);
|
||||
if (nosv_mutex_lock(mutex) != 0)
|
||||
die("nosv_mutex_lock failed");
|
||||
|
||||
while (!ready) {
|
||||
if (nosv_cond_wait(cond, mutex) != 0)
|
||||
die("nosv_cond_wait failed");
|
||||
}
|
||||
|
||||
if (nosv_mutex_unlock(mutex) != 0)
|
||||
die("nosv_mutex_unlock failed");
|
||||
}
|
||||
|
||||
static void
|
||||
task_done(nosv_task_t task)
|
||||
{
|
||||
UNUSED(task);
|
||||
atomic_fetch_add(&ncompleted, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
task_broadcast_run(nosv_task_t task)
|
||||
{
|
||||
UNUSED(task);
|
||||
if (nosv_mutex_lock(mutex) != 0)
|
||||
die("nosv_mutex_lock failed");
|
||||
|
||||
atomic_store(&ready, 1);
|
||||
if (nosv_cond_broadcast(cond) != 0)
|
||||
die("nosv_cond_broadcast failed");
|
||||
|
||||
if (nosv_mutex_unlock(mutex) != 0)
|
||||
die("nosv_mutex_unlock failed");
|
||||
|
||||
if (nosv_mutex_lock(mutex) != 0)
|
||||
die("nosv_mutex_lock failed");
|
||||
|
||||
if (nosv_cond_signal(cond) != 0)
|
||||
die("nosv_cond_signal failed");
|
||||
|
||||
if (nosv_mutex_unlock(mutex) != 0)
|
||||
die("nosv_mutex_unlock failed");
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
nosv_init();
|
||||
|
||||
nosv_task_type_t task_type;
|
||||
nosv_type_init(&task_type, task_body, NULL, task_done, "task", NULL, NULL, NOSV_TYPE_INIT_NONE);
|
||||
|
||||
nosv_task_t task_broadcast;
|
||||
nosv_task_type_t task_type_broadcast;
|
||||
nosv_type_init(&task_type_broadcast, task_broadcast_run, NULL, NULL, "task_broadcast", NULL, NULL, NOSV_TYPE_INIT_NONE);
|
||||
|
||||
if (nosv_cond_init(&cond, NOSV_COND_NONE) != 0)
|
||||
die("nosv_cond_init failed");
|
||||
|
||||
if (nosv_mutex_init(&mutex, NOSV_MUTEX_NONE) != 0)
|
||||
die("nosv_mutex_init failed");
|
||||
|
||||
for (int i = 0; i < NTASKS; i++)
|
||||
nosv_create(&tasks[i], task_type, 0, NOSV_CREATE_NONE);
|
||||
|
||||
nosv_create(&task_broadcast, task_type_broadcast, 0, NOSV_CREATE_NONE);
|
||||
|
||||
for (int i = 0; i < NTASKS; i++)
|
||||
nosv_submit(tasks[i], NOSV_SUBMIT_NONE);
|
||||
|
||||
while (atomic_load(&nstarted) != NTASKS)
|
||||
nosv_yield(NOSV_YIELD_NONE);
|
||||
|
||||
nosv_submit(task_broadcast, NOSV_SUBMIT_NONE);
|
||||
|
||||
while (atomic_load(&ncompleted) != NTASKS)
|
||||
sleep_us(1000);
|
||||
|
||||
for (int i = 0; i < NTASKS; i++)
|
||||
nosv_destroy(tasks[i], NOSV_DESTROY_NONE);
|
||||
|
||||
nosv_destroy(task_broadcast, NOSV_DESTROY_NONE);
|
||||
|
||||
if (nosv_cond_destroy(cond) != 0)
|
||||
die("nosv_cond_destroy failed");
|
||||
|
||||
if (nosv_mutex_destroy(mutex) != 0)
|
||||
die("nosv_mutex_destroy failed");
|
||||
|
||||
nosv_type_destroy(task_type, NOSV_TYPE_DESTROY_NONE);
|
||||
nosv_type_destroy(task_type_broadcast, NOSV_TYPE_DESTROY_NONE);
|
||||
|
||||
nosv_shutdown();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,117 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
/*
|
||||
* This test creates several tasks that all perform the computation with the
|
||||
* same instructions. However, the access to the memory is done differently. The
|
||||
* first set of tasks use a stride 1, the next 2, the next 4 and so on until
|
||||
* 2^(NSTRIDE-1). This access causes more L3 cache misses, which increases the
|
||||
* execution time, typically directly proportional to the stride number.
|
||||
*
|
||||
* The number of instructions given by PAPI_TOT_INS should remain constant
|
||||
* across tasks, but it is expected that PAPI_L3_TCM increases with the
|
||||
* stride.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <nosv.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "compat.h"
|
||||
|
||||
#define NTASKS 200
|
||||
atomic_int ncompleted = 0;
|
||||
|
||||
nosv_task_t tasks[NTASKS];
|
||||
|
||||
#define NRUNS 2
|
||||
#define NSTRIDE 4
|
||||
#define MAXN (256L * 1024L) /* Adjust this for larger L3 */
|
||||
|
||||
struct meta {
|
||||
long n;
|
||||
long stride;
|
||||
double *vec;
|
||||
};
|
||||
|
||||
static double
|
||||
get_time(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (double) ts.tv_sec + (double) ts.tv_nsec * 1.0e-9;
|
||||
}
|
||||
|
||||
static void
|
||||
task_body(nosv_task_t task)
|
||||
{
|
||||
struct meta *meta = nosv_get_task_metadata(task);
|
||||
|
||||
long stride = meta->stride;
|
||||
|
||||
/* Stride access, some computation */
|
||||
for (long i = 0; i < stride; i++)
|
||||
for (long j = i; j < meta->n; j += stride)
|
||||
meta->vec[j] = sqrt(meta->vec[j]);
|
||||
|
||||
atomic_fetch_add(&ncompleted, 1);
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
nosv_init();
|
||||
|
||||
nosv_task_type_t task_type;
|
||||
nosv_type_init(&task_type, task_body, NULL, NULL, "task", NULL, NULL, 0);
|
||||
|
||||
for (int i = 0; i < NTASKS; i++) {
|
||||
nosv_create(&tasks[i], task_type, sizeof(struct meta), 0);
|
||||
struct meta *meta = nosv_get_task_metadata(tasks[i]);
|
||||
meta->n = MAXN;
|
||||
meta->vec = calloc(MAXN, sizeof(double));
|
||||
for (long i = 0; i < MAXN; i++)
|
||||
meta->vec[i] = (double) i;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%8s %8s %8s\n", "run", "stride", "time");
|
||||
|
||||
/* Repeat for warmup */
|
||||
for (int run = 0; run < NRUNS; run++) {
|
||||
for (int s = 0; s < NSTRIDE; s++) {
|
||||
long stride = 1L << s;
|
||||
|
||||
atomic_store(&ncompleted, 0); /* reset */
|
||||
|
||||
double t0 = get_time();
|
||||
|
||||
for (int i = 0; i < NTASKS; i++) {
|
||||
struct meta *meta = nosv_get_task_metadata(tasks[i]);
|
||||
meta->stride = stride;
|
||||
nosv_submit(tasks[i], 0);
|
||||
}
|
||||
|
||||
while (atomic_load(&ncompleted) != NTASKS)
|
||||
sleep_us(1000);
|
||||
|
||||
double t1 = get_time();
|
||||
|
||||
printf("%8d %8ld %8.3f\n", run, stride, t1 - t0);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < NTASKS; i++) {
|
||||
struct meta *meta = nosv_get_task_metadata(tasks[i]);
|
||||
free(meta->vec);
|
||||
nosv_destroy(tasks[i], 0);
|
||||
}
|
||||
|
||||
nosv_type_destroy(task_type, 0);
|
||||
|
||||
nosv_shutdown();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,18 +0,0 @@
|
||||
target=$OVNI_TEST_BIN
|
||||
|
||||
# We will set our own
|
||||
unset NOSV_CONFIG
|
||||
unset NOSV_CONFIG_OVERRIDE
|
||||
|
||||
export NOSV_APPID=1
|
||||
|
||||
cat > nosv.toml << EOF
|
||||
instrumentation.version = "ovni"
|
||||
ovni.level = 2
|
||||
hwcounters.backend = "papi"
|
||||
hwcounters.papi_events = [ "PAPI_TOT_INS", "PAPI_TOT_CYC", "PAPI_L3_TCM" ]
|
||||
EOF
|
||||
|
||||
$target
|
||||
|
||||
ovniemu -l ovni
|
||||
@ -1,18 +0,0 @@
|
||||
target=$OVNI_TEST_BIN
|
||||
|
||||
# We will set our own
|
||||
unset NOSV_CONFIG
|
||||
unset NOSV_CONFIG_OVERRIDE
|
||||
|
||||
export NOSV_APPID=1
|
||||
|
||||
cat > nosv.toml << EOF
|
||||
instrumentation.version = "ovni"
|
||||
ovni.level = 2
|
||||
hwcounters.backend = "papi"
|
||||
hwcounters.papi_events = [ "PAPI_TOT_INS", "PAPI_TOT_CYC" ]
|
||||
EOF
|
||||
|
||||
$target
|
||||
|
||||
ovniemu -l ovni
|
||||
@ -1,62 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <nosv.h>
|
||||
#include <nosv/affinity.h>
|
||||
#include <stdatomic.h>
|
||||
#include <unistd.h>
|
||||
#include "compat.h"
|
||||
#include "common.h"
|
||||
|
||||
#define NTASKS 10
|
||||
|
||||
atomic_int nr_completed_tasks;
|
||||
atomic_int completed;
|
||||
|
||||
nosv_task_t child_task;
|
||||
nosv_task_t main_task;
|
||||
|
||||
static void
|
||||
task_run(nosv_task_t task)
|
||||
{
|
||||
UNUSED(task);
|
||||
sleep_us(100);
|
||||
}
|
||||
|
||||
static void
|
||||
task_comp(nosv_task_t task)
|
||||
{
|
||||
int total = atomic_fetch_add(&nr_completed_tasks, 1);
|
||||
if (total < NTASKS) {
|
||||
nosv_submit(task, NOSV_SUBMIT_NONE);
|
||||
/* Wait to give the scheduler the chance to win the race */
|
||||
sleep_us(100);
|
||||
} else {
|
||||
nosv_submit(main_task, NOSV_SUBMIT_UNLOCKED);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
nosv_init();
|
||||
|
||||
nosv_attach(&main_task, NULL, NULL, NOSV_ATTACH_NONE);
|
||||
|
||||
nosv_task_type_t task_type;
|
||||
nosv_type_init(&task_type, task_run, NULL, task_comp, "child_task", NULL, NULL, NOSV_TYPE_INIT_NONE);
|
||||
nosv_create(&child_task, task_type, 0, 0);
|
||||
nosv_submit(child_task, NOSV_SUBMIT_NONE);
|
||||
|
||||
nosv_pause(NOSV_PAUSE_NONE);
|
||||
|
||||
nosv_destroy(child_task, NOSV_DESTROY_NONE);
|
||||
|
||||
nosv_detach(NOSV_DETACH_NONE);
|
||||
|
||||
nosv_type_destroy(task_type, NOSV_TYPE_DESTROY_NONE);
|
||||
|
||||
nosv_shutdown();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,30 +0,0 @@
|
||||
target=$OVNI_TEST_BIN
|
||||
|
||||
# We will set our own
|
||||
unset NOSV_CONFIG
|
||||
unset NOSV_CONFIG_OVERRIDE
|
||||
|
||||
export NOSV_APPID=1
|
||||
|
||||
cat > nosv.toml << EOF
|
||||
instrumentation.version = "ovni"
|
||||
ovni.level = 3
|
||||
EOF
|
||||
|
||||
$target
|
||||
|
||||
. ./vars.sh
|
||||
|
||||
# We need to sort the trace as it has unsorted regions
|
||||
ovnisort ovni
|
||||
|
||||
# Ensure that we don't find any noisy events by using the limit of 5% of
|
||||
# nosv_yield calls. It is fine if some events don't appear.
|
||||
ovnitop ovni | awk -v n=$nyields \
|
||||
'/^V(AY|Ay|Sh|Sf|S\[|S\]|SN|Sn|Pr|Pp)/ { \
|
||||
if ($2 < 0.05 * n) { print $0, "OK" }
|
||||
else { print $0, "BAD"; bad++ }
|
||||
} END { if (bad != 0) { exit 1 } }'
|
||||
|
||||
# Perform the emulation with breakdown enabled
|
||||
ovniemu -b ovni
|
||||
@ -1,38 +0,0 @@
|
||||
target=$OVNI_TEST_BIN
|
||||
|
||||
# We will set our own
|
||||
unset NOSV_CONFIG
|
||||
unset NOSV_CONFIG_OVERRIDE
|
||||
|
||||
export NOSV_APPID=1
|
||||
|
||||
cat > nosv.toml << EOF
|
||||
instrumentation.version = "ovni"
|
||||
ovni.level = 4
|
||||
EOF
|
||||
|
||||
$target
|
||||
|
||||
. ./vars.sh
|
||||
|
||||
# We need to sort the trace as it has unsorted regions
|
||||
ovnisort ovni
|
||||
|
||||
# Ensure we get a lot of VA[yY] and VS[nN] events. We need to make sure that all
|
||||
# events are matched.
|
||||
ovnitop ovni | awk -v n=$nyields \
|
||||
'/^VA[Yy]/ { \
|
||||
/* Match the number or nosv_yield calls exactly */
|
||||
if ($2 == n) { print $0, "OK"; ok++ }
|
||||
else { print $0, "BAD" }
|
||||
} \
|
||||
/^VS[Nn]/ { \
|
||||
/* Use 2% for the non-blocking scheduler events */
|
||||
if ($2 > 0.02 * n) { print $0, "OK"; ok++ }
|
||||
else { print $0, "BAD" }
|
||||
} END {
|
||||
/* Be sure we matched the 4 rules */
|
||||
if (ok != 4) { exit 1 }
|
||||
}'
|
||||
|
||||
# Avoid emulation as may be huge
|
||||
@ -1,89 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <nosv.h>
|
||||
#include <nosv/alpi.h>
|
||||
#include <stdatomic.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "compat.h"
|
||||
|
||||
#define NITERS 1000L
|
||||
|
||||
atomic_int ncompleted = 0;
|
||||
|
||||
static void
|
||||
busywait(long iter)
|
||||
{
|
||||
for (volatile long i = 0; i < iter; i++)
|
||||
;
|
||||
}
|
||||
|
||||
static void
|
||||
task_body(nosv_task_t task)
|
||||
{
|
||||
UNUSED(task);
|
||||
|
||||
/* Yield a lot of times to try to generate several events */
|
||||
for (long i = 0; i < NITERS; i++) {
|
||||
nosv_yield(0);
|
||||
busywait(10000L);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
task_done(nosv_task_t task)
|
||||
{
|
||||
UNUSED(task);
|
||||
atomic_fetch_add(&ncompleted, 1);
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
nosv_init();
|
||||
|
||||
uint64_t ncpus;
|
||||
|
||||
if (alpi_cpu_count(&ncpus))
|
||||
die("alpi_cpu_count failed");
|
||||
|
||||
nosv_task_t *tasks = calloc((size_t) ncpus, sizeof(nosv_task_t));
|
||||
if (tasks == NULL)
|
||||
die("calloc failed:");
|
||||
|
||||
int ntasks = (int) ncpus;
|
||||
info("ntasks = %d", ntasks);
|
||||
|
||||
FILE *f = fopen("vars.sh", "w");
|
||||
if (f == NULL)
|
||||
die("fopen failed:");
|
||||
|
||||
fprintf(f, "nyields=%ld\n", (long) ntasks * NITERS);
|
||||
fclose(f);
|
||||
|
||||
nosv_task_type_t task_type;
|
||||
nosv_type_init(&task_type, task_body, NULL, task_done, "task", NULL, NULL, 0);
|
||||
|
||||
for (int i = 0; i < ntasks; i++)
|
||||
nosv_create(&tasks[i], task_type, 0, 0);
|
||||
|
||||
for (int i = 0; i < ntasks; i++)
|
||||
nosv_submit(tasks[i], 0);
|
||||
|
||||
while (atomic_load(&ncompleted) != ntasks)
|
||||
sleep_us(1000);
|
||||
|
||||
for (int i = 0; i < ntasks; i++)
|
||||
nosv_destroy(tasks[i], 0);
|
||||
|
||||
nosv_type_destroy(task_type, 0);
|
||||
|
||||
free(tasks);
|
||||
|
||||
nosv_shutdown();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,37 +1,31 @@
|
||||
# Copyright (c) 2022-2025 Barcelona Supercomputing Center (BSC)
|
||||
# Copyright (c) 2022-2024 Barcelona Supercomputing Center (BSC)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
if(NOT LIBOMPV_FOUND)
|
||||
check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND)
|
||||
check_linker_flag(C "-fopenmp=libompv" OPENMPV_LINKER_FOUND)
|
||||
cmake_path(GET CMAKE_C_COMPILER PARENT_PATH CMAKE_C_COMPILER_PATH)
|
||||
|
||||
if(NOT OPENMPV_COMPILER_FOUND OR NOT OPENMPV_LINKER_FOUND)
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable libompv RT tests")
|
||||
message(FATAL_ERROR "Compiler doesn't support -fopenmp=libompv flag, cannot enable OpenMP-V RT tests")
|
||||
else()
|
||||
message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling libompv RT tests")
|
||||
message(STATUS "Compiler doesn't support -fopenmp=libompv flag, disabling OpenMP-V RT tests")
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(NOT NOSV_FOUND)
|
||||
if(ENABLE_ALL_TESTS)
|
||||
message(FATAL_ERROR "nOS-V not found, cannot enable libompv RT tests")
|
||||
else()
|
||||
message(STATUS "nOS-V not found, disabling libompv RT tests")
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
|
||||
message(STATUS "Enabling libompv RT tests")
|
||||
|
||||
function(openmp_rt_test)
|
||||
ovni_test(${ARGN} SORT)
|
||||
target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-no-pedantic")
|
||||
target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m" Libompv PkgConfig::NOSV)
|
||||
ovni_test(${ARGN})
|
||||
target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv"
|
||||
"-no-pedantic")
|
||||
target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fopenmp=libompv")
|
||||
target_link_libraries("${OVNI_TEST_NAME}" PRIVATE "m")
|
||||
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
|
||||
ENVIRONMENT "OMP_OVNI=1")
|
||||
set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY
|
||||
ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni,ovni.level=3")
|
||||
ENVIRONMENT "NOSV_CONFIG_OVERRIDE=instrumentation.version=ovni")
|
||||
endfunction()
|
||||
|
||||
openmp_rt_test(active.c DRIVER active.driver.sh)
|
||||
openmp_rt_test(barrier-explicit.c)
|
||||
openmp_rt_test(critical.c)
|
||||
openmp_rt_test(if0-nested-task.c)
|
||||
@ -41,19 +35,16 @@ openmp_rt_test(parallel-for.c)
|
||||
openmp_rt_test(parallel-loop.c)
|
||||
openmp_rt_test(parallel-nested.c)
|
||||
openmp_rt_test(parallel-task.c)
|
||||
openmp_rt_test(passive.c DRIVER passive.driver.sh)
|
||||
openmp_rt_test(sections.c)
|
||||
openmp_rt_test(simple-task.c)
|
||||
openmp_rt_test(task.c)
|
||||
openmp_rt_test(taskloop.c)
|
||||
openmp_rt_test(taskwait.c)
|
||||
openmp_rt_test(task-untied.c)
|
||||
openmp_rt_test(team-distribute.c)
|
||||
openmp_rt_test(worksharing-and-tasks.c)
|
||||
openmp_rt_test(worksharing-mix.c BREAKDOWN)
|
||||
openmp_rt_test(worksharing-mix.c)
|
||||
openmp_rt_test(worksharing-task.c)
|
||||
openmp_rt_test(worksharing.c)
|
||||
openmp_rt_test(worksharing01.c)
|
||||
openmp_rt_test(worksharing02.c)
|
||||
openmp_rt_test(worksharing03.c)
|
||||
openmp_rt_test(worksharing-active-th.c DRIVER worksharing-active-th.driver.sh)
|
||||
|
||||
@ -1,19 +0,0 @@
|
||||
#include "compat.h"
|
||||
|
||||
/* This test tries to make threads generate as many nosv_pause() calls to try to
|
||||
* flood the trace as possible. */
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp single nowait
|
||||
{
|
||||
#pragma omp task
|
||||
{
|
||||
sleep_us(10000);
|
||||
}
|
||||
sleep_us(10000);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,24 +0,0 @@
|
||||
target=$OVNI_TEST_BIN
|
||||
|
||||
export NOSV_APPID=1
|
||||
export OMP_OVNI=1
|
||||
export OMP_WAIT_POLICY=active
|
||||
export OMP_NUM_THREADS=4
|
||||
export NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni,ovni.level=3"
|
||||
|
||||
# Repeat several times, as the test is not stable. We only want to be sure that
|
||||
# we never generate too many events.
|
||||
for i in $(seq 10); do
|
||||
rm -rf ovni
|
||||
|
||||
$target
|
||||
|
||||
ovnisort ovni
|
||||
# No need to emulate
|
||||
#ovniemu -l ovni
|
||||
|
||||
# Make sure that we only see a low number of threads being paused
|
||||
ovnitop ovni > top.txt
|
||||
cat top.txt
|
||||
awk -v n=500 '/^OHp/ && $2 > n { printf("too many OHp events: %d > %d", $2, n); exit 1 }' < top.txt
|
||||
done
|
||||
@ -1,20 +0,0 @@
|
||||
#include "compat.h"
|
||||
#include <omp.h>
|
||||
|
||||
/* This test tries to make threads generate as many nosv_pause() calls to try to
|
||||
* flood the trace as possible. This problem should be solved by the new passive
|
||||
* mechanism. */
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
if (omp_get_thread_num() == 1)
|
||||
#pragma omp task
|
||||
{
|
||||
sleep_us(10000);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,23 +0,0 @@
|
||||
target=$OVNI_TEST_BIN
|
||||
|
||||
export NOSV_APPID=1
|
||||
export OMP_OVNI=1
|
||||
export OMP_WAIT_POLICY=passive
|
||||
export OMP_NUM_THREADS=4
|
||||
export NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni,ovni.level=2"
|
||||
|
||||
# Repeat several times, as the test is not stable. We only want to be sure that
|
||||
# we never generate too many events.
|
||||
for i in $(seq 10); do
|
||||
rm -rf ovni
|
||||
|
||||
$target
|
||||
|
||||
ovnisort ovni
|
||||
ovniemu -l ovni
|
||||
|
||||
# Make sure that we only see a low number of threads being paused
|
||||
ovnitop ovni > top.txt
|
||||
cat top.txt
|
||||
awk -v n=500 '/^OHp/ && $2 > n { printf("too many OHp events: %d > %d", $2, n); exit 1 }' < top.txt
|
||||
done
|
||||
@ -1,22 +0,0 @@
|
||||
/* Copyright (c) 2025 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
/*
|
||||
* Ensures that we can emit a sequence of PPx, PPe, PPx, PPe events for the same
|
||||
* OpenMP task to execute it again.
|
||||
*
|
||||
* See: https://gitlab.pm.bsc.es/rarias/ovni/-/issues/208
|
||||
*/
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp master
|
||||
#pragma omp task untied
|
||||
{
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
#pragma omp taskyield
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -11,7 +11,7 @@ int main(void)
|
||||
#pragma omp taskloop
|
||||
for (int i = 0; i < 10000; i++)
|
||||
{
|
||||
#pragma omp task label("taskloop task")
|
||||
#pragma omp task
|
||||
sleep_us(1);
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <nosv.h>
|
||||
#include "ovni.h"
|
||||
|
||||
/* Ensure that the thread is paused on nosv_waitfor(), so there is a hole in the
|
||||
* OpenMP views, as they track the active thread (in the thread views) and the
|
||||
* running thread (in the CPU views). */
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
ovni_mark_type(0, OVNI_MARK_STACK, "tracker");
|
||||
|
||||
#pragma omp parallel for num_threads(1)
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
ovni_mark_push(0, 123);
|
||||
/* We should see a hole here */
|
||||
nosv_waitfor(10, NULL);
|
||||
ovni_mark_pop(0, 123);
|
||||
}
|
||||
}
|
||||
@ -1,31 +0,0 @@
|
||||
target=$OVNI_TEST_BIN
|
||||
|
||||
export NOSV_APPID=1
|
||||
export OMP_NUM_THREADS=1
|
||||
|
||||
$target
|
||||
|
||||
ovnisort ovni
|
||||
|
||||
ovniemu -l ovni
|
||||
|
||||
# Mark API adds 100 to the type
|
||||
prvtype="100"
|
||||
|
||||
row=$(grep '100:123$' ovni/thread.prv | head -1 | cut -d: -f 5)
|
||||
t0=$(grep '100:123$' ovni/thread.prv | head -1 | cut -d: -f 6)
|
||||
t1=$(grep '100:123$' ovni/thread.prv | tail -1 | cut -d: -f 6)
|
||||
|
||||
PRV_THREAD_STATE=4
|
||||
TH_ST_PAUSED=2
|
||||
|
||||
# 2:0:1:1:1:15113228:100:123
|
||||
count=$(grep "2:0:1:1:$row:.*:$PRV_THREAD_STATE:$TH_ST_PAUSED" ovni/thread.prv |\
|
||||
awk -F: '$6 >= '$t0' && $6 <= '$t1' {n++} END {print n}')
|
||||
|
||||
if [ "$count" != 100 ]; then
|
||||
echo "FAIL: expected 100 pause events"
|
||||
exit 1
|
||||
else
|
||||
echo "OK: found 100 pause events"
|
||||
fi
|
||||
@ -7,7 +7,7 @@ int main(void)
|
||||
{
|
||||
//#pragma omp single nowait
|
||||
for (int i = 0; i < 100; i++) {
|
||||
#pragma omp task label("minitask")
|
||||
#pragma omp task
|
||||
sleep_us(10);
|
||||
}
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@ int main(void)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for label("static-for-1")
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
@ -26,7 +26,7 @@ int main(void)
|
||||
{ sleep_us(104); printf("104\n"); }
|
||||
}
|
||||
|
||||
#pragma omp for label("static-for-2")
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
@ -34,7 +34,7 @@ int main(void)
|
||||
#pragma omp single
|
||||
for (int i = 0; i < 100; i++)
|
||||
{
|
||||
#pragma omp task label("mini-task")
|
||||
#pragma omp task
|
||||
sleep_us(10);
|
||||
}
|
||||
}
|
||||
@ -46,11 +46,11 @@ int main(void)
|
||||
|
||||
#pragma omp barrier
|
||||
|
||||
#pragma omp for label("static-for-3")
|
||||
#pragma omp for
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(1);
|
||||
}
|
||||
#pragma omp for schedule(dynamic, 1) label("dynamic-for")
|
||||
#pragma omp for schedule(dynamic, 1)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sleep_us(i);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user