diff --git a/CMakeLists.txt b/CMakeLists.txt index 393a160..8b8f484 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,8 @@ cmake_minimum_required(VERSION 3.20) project(OVNI LANGUAGES C) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") + add_compile_options(-Wall -Wextra -Wformat -Wmissing-prototypes -Wstrict-prototypes #-Wconversion -Wsign-conversion diff --git a/cmake/FindNanos6.cmake b/cmake/FindNanos6.cmake new file mode 100644 index 0000000..0a3e6fe --- /dev/null +++ b/cmake/FindNanos6.cmake @@ -0,0 +1,20 @@ +include(GNUInstallDirs) + +find_library(NANOS6_LIBRARY NAMES nanos6) +find_path(NANOS6_INCLUDE_DIR nanos6.h) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(Nanos6 DEFAULT_MSG + NANOS6_LIBRARY NANOS6_INCLUDE_DIR) + +if(NOT NANOS6_FOUND) + return() +endif() + +if(NOT TARGET Nanos6::nanos6) + add_library(Nanos6::nanos6 SHARED IMPORTED) + set_target_properties(Nanos6::nanos6 PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${NANOS6_INCLUDE_DIR}" + IMPORTED_LOCATION ${NANOS6_LIBRARY}) +endif() diff --git a/cmake/FindNodes.cmake b/cmake/FindNodes.cmake new file mode 100644 index 0000000..e17d3d9 --- /dev/null +++ b/cmake/FindNodes.cmake @@ -0,0 +1,34 @@ +include(GNUInstallDirs) + +if(DEFINED ENV{NODES_HOME}) + set(NODES_HOME "$ENV{NODES_HOME}") +else() + message(STATUS "NODES_HOME not set, refusing to search") +endif() + +find_library(NODES_LIBRARY NAMES nanos6 PATHS "${NODES_HOME}/lib" NO_DEFAULT_PATH) +#find_path(NODES_WRAPPER nanos6-main-wrapper.o PATHS "${NODES_HOME}/lib" NO_DEFAULT_PATH) +find_file(NODES_WRAPPER NAMES nanos6-main-wrapper.o PATHS "${NODES_HOME}/lib" NO_DEFAULT_PATH) +find_path(NODES_INCLUDE_DIR nanos6.h PATHS "${NODES_HOME}/include" NO_DEFAULT_PATH) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(Nodes DEFAULT_MSG + NODES_LIBRARY NODES_INCLUDE_DIR NODES_WRAPPER) + +if(NOT NODES_FOUND) + return() +endif() + +if(NOT TARGET Nodes::nodes) + add_library(Nodes::nodes SHARED IMPORTED) + set_target_properties(Nodes::nodes PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${NODES_INCLUDE_DIR}" + IMPORTED_LOCATION ${NODES_LIBRARY}) +endif() + +if(NOT TARGET Nodes::wrapper) + add_library(Nodes::wrapper STATIC IMPORTED) + set_target_properties(Nodes::wrapper PROPERTIES + IMPORTED_LOCATION ${NODES_WRAPPER}) +endif() diff --git a/test/macros.cmake b/test/macros.cmake index 09676f9..575bb8d 100644 --- a/test/macros.cmake +++ b/test/macros.cmake @@ -47,7 +47,7 @@ function(ovni_test source) "OVNI_CURRENT_DIR=${CMAKE_CURRENT_BINARY_DIR}") add_executable("${OVNI_TEST_NAME}" "${OVNI_TEST_SOURCE}") - target_link_libraries("${OVNI_TEST_NAME}" ovni) + target_link_libraries("${OVNI_TEST_NAME}" PRIVATE ovni) set(driver "${OVNI_TEST_SOURCE_DIR}/ovni-driver.sh") diff --git a/test/rt/CMakeLists.txt b/test/rt/CMakeLists.txt index 1274cf3..7d11797 100644 --- a/test/rt/CMakeLists.txt +++ b/test/rt/CMakeLists.txt @@ -1,5 +1,6 @@ # Copyright (c) 2022 Barcelona Supercomputing Center (BSC) # SPDX-License-Identifier: GPL-3.0-or-later -add_subdirectory(nosv) add_subdirectory(nanos6) +add_subdirectory(nodes) +add_subdirectory(nosv) diff --git a/test/rt/nanos6/CMakeLists.txt b/test/rt/nanos6/CMakeLists.txt index 35ea67c..81d02cf 100644 --- a/test/rt/nanos6/CMakeLists.txt +++ b/test/rt/nanos6/CMakeLists.txt @@ -1,8 +1,7 @@ # Copyright (c) 2022 Barcelona Supercomputing Center (BSC) # SPDX-License-Identifier: GPL-3.0-or-later -find_library(nanos6 libnanos6) -find_path(NANOS6_INCLUDE_DIR nanos6.h) +find_package(Nanos6) include(CheckCCompilerFlag) check_c_compiler_flag("-fompss-2" HAVE_FOMPSS2_FLAG) @@ -11,8 +10,7 @@ if(NOT HAVE_FOMPSS2_FLAG) return() endif() -check_library_exists(nanos6 nanos6_init "nanos6.h" HAVE_NANOS6) -if(NOT HAVE_NANOS6) +if(NOT NANOS6_FOUND) message(STATUS "Nanos6 not found, disabling Nanos6 RT tests") return() else() @@ -28,9 +26,7 @@ function(nanos6_rt_test) ovni_test(${ARGN}) target_compile_options("${OVNI_TEST_NAME}" PUBLIC "-fompss-2") target_link_options("${OVNI_TEST_NAME}" PUBLIC "-fompss-2") - target_link_libraries("${OVNI_TEST_NAME}" nanos6) - target_include_directories("${OVNI_TEST_NAME}" - PUBLIC ${NANOS6_INCLUDE_DIR}) + target_link_libraries("${OVNI_TEST_NAME}" PRIVATE Nanos6::nanos6) set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY ENVIRONMENT "NANOS6_CONFIG=${OVNI_TEST_SOURCE_DIR}/rt/nanos6/nanos6.toml") if(NANOS6_TEST_LEVEL) diff --git a/test/rt/nodes/CMakeLists.txt b/test/rt/nodes/CMakeLists.txt new file mode 100644 index 0000000..1730c8d --- /dev/null +++ b/test/rt/nodes/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright (c) 2022 Barcelona Supercomputing Center (BSC) +# SPDX-License-Identifier: GPL-3.0-or-later + +find_package(Nodes) + +include(CheckCCompilerFlag) +check_c_compiler_flag("-fompss-2" HAVE_FOMPSS2_FLAG) +if(NOT HAVE_FOMPSS2_FLAG) + message(STATUS "Compiler doesn't support -fompss-2 flag, disabling NODES RT tests") + return() +endif() + +if(NOT NODES_FOUND) + message(STATUS "NODES not found, disabling NODES RT tests") + return() +else() + message(STATUS "Enabling NODES RT tests") +endif() + +function(nodes_rt_test) + ovni_test(${ARGN}) + + target_compile_options("${OVNI_TEST_NAME}" PUBLIC + "-fompss-2" "-fdo-not-use-nanos6") + + target_link_options("${OVNI_TEST_NAME}" PUBLIC + "-fompss-2" "-fdo-not-use-nanos6" "-z" "lazy" + "-rpath" "${NODES_HOME}/lib") + + target_link_libraries("${OVNI_TEST_NAME}" PRIVATE Nodes::nodes Nodes::wrapper) + set_property(TEST "${OVNI_TEST_NAME}" APPEND PROPERTY + ENVIRONMENT "NANOS6_CONFIG=${OVNI_TEST_SOURCE_DIR}/rt/nodes/nanos6.toml") +endfunction() + +nodes_rt_test(../nanos6/simple-task.c NAME simple-task) +nodes_rt_test(../nanos6/nested-task.c NAME nested-task) +nodes_rt_test(../nanos6/several-tasks.c NAME several-tasks) +nodes_rt_test(../nanos6/if0.c NAME if0) +nodes_rt_test(../nanos6/sched-add.c NAME sched-add) diff --git a/test/rt/nodes/nanos6.toml b/test/rt/nodes/nanos6.toml new file mode 100644 index 0000000..1f7756b --- /dev/null +++ b/test/rt/nodes/nanos6.toml @@ -0,0 +1,229 @@ +[version] + # Choose whether the runtime runs with debug symbols and internal asserts. Enabling this option + # may produce significant overheads, so production or performance executions should disable this + # option. Default is false + debug = false + # Choose the dependency system implementation. Default is "discrete" + # Possible values: "discrete", "regions" + dependencies = "discrete" + # Choose the instrumentation variant to run. Default is "none" + # Possible values: "none", "ctf", "extrae", "graph", "lint", "stats", "verbose" + instrument = "ovni" + +[turbo] + # Choose whether enabling floating-point unit optimizations in all Nanos6 threads. Enabling this + # option can speedup the floating-point computations in user tasks, but may produce imprecise + # results. It enables flush-to-zero (FZ) and denormals are zero (DAZ) optimizations in Intel® + # processors. Default is false + enabled = false + +[scheduler] + # Choose the task scheduling policy. Default is "fifo" + # Possible values: "fifo", "lifo" + policy = "fifo" + # Enable the immediate successor feature to improve cache data reutilization between successor + # tasks. If enabled, when a CPU finishes a task it starts executing the successor task (computed + # through their data dependencies). Default is true + immediate_successor = true + # Indicate whether the scheduler should consider task priorities defined by the user in the + # task's priority clause. Default is true + priority = true + +[cpumanager] + # The underlying policy of the CPU manager for the handling of CPUs. Default is "default", which + # corresponds to "hybrid" + # Possible values: "default", "idle", "busy", "hybrid", "lewi", "greedy" + policy = "default" + # The maximum number of iterations to busy wait for before idling. Default is "240000". Only + # works for the 'hybrid' policy. This number will be divided by the number of active CPUs to + # obtain a "busy_iters per CPU" metric for each individual CPU to busy-wait for + busy_iters = 240000 + +[taskfor] + # Choose the total number of CPU groups that will execute the worksharing tasks (taskfors). Default + # is none (not set), which means that the runtime will create one taskfor group per NUMA node + # groups = 1 + # Indicate whether should print the taskfor groups information + report = false + +[throttle] + # Enable throttle to stop creating tasks when certain conditions are met. Default is false + enabled = false + # Maximum number of child tasks that can be created before throttling. Default is 5000000 + tasks = 5000000 + # Maximum memory pressure (percent of max_memory) before throttling. Default is 70 (%) + pressure = 70 # % + # Maximum memory that can be used by the runtime. Default is "0", which equals half of system memory + max_memory = "0" + # Evaluation interval (us). Each time this amount of time is elapsed, the throttle system queries + # the memory allocator statistics and evaluates the current memory pressure. A higher interval + # results in less accurate pressure estimation, but a lower interval introduces noticeable overhead, + # as reading memory statistics requires a global lock on the allocator to aggregate per-thread + # statistics. Default is 1000 + polling_period_us = 1000 + +[numa] + # Enable NUMA tracking of task data. NUMA tracking consists of annotating the NUMA location + # of data to be later scheduled based on this information. When using "auto" this feature is + # enabled in the first allocation done using the Nanos6 NUMA API. Default is "auto" + # Possible values: "auto", "on", "off" + tracking = "auto" + # Indicate whether should print the NUMA bitmask of each NUMA wildcards + report = false + # Use the page auto-discovery mechanism to detect the system's page size + # Default is true, which is useful in systems with THP enabled + # Set to false will use the default page size, which is arch-dependent + discover_pagesize = true + +[hardware_counters] + # Enable the verbose mode of hardware counters, printing a small summary of metrics at the + # end of the execution. Default is false + verbose = false + # The verbose file's name. Default is "nanos6-output-hwcounters.txt" + verbose_file = "nanos6-output-hwcounters.txt" + [hardware_counters.papi] + # Enable the PAPI backend of the hardware counters module. Default is false + enabled = false + # The list of PAPI counters to read. Default is "PAPI_TOT_INS" and "PAPI_TOT_CYC" + counters = [ + "PAPI_TOT_INS", + "PAPI_TOT_CYC" + ] + [hardware_counters.rapl] + # Enable the RAPL backend of the hardware counters module for runtime-wise energy + # metrics. Default is false + enabled = false + +[monitoring] + # Indicate whether enabling the Monitoring of tasks and CPUs, which allows the usage of + # prediction-based policies. Disabled by default + enabled = false + # Indicate whether enabling the "Wisdom" mechanism of Monitoring, saving normalized metrics for + # future executions and loading previously saved metrics when the runtime initializes. Default + # is false + wisdom = false + # Enable the verbose mode of Monitoring, which prints a detailed summary of task type metrics + # at the end of the execution. Default is true + verbose = true + # The verbose file's name. Default is "output-monitoring.txt" + verbose_file = "output-monitoring.txt" + # The prediction rate at which CPU usage predictions are infered. Default is once every 100µs + cpuusage_prediction_rate = 100 # µs + # The number of samples (window) of the normalized exponential moving average for predictions + # Default is 20 + rolling_window = 20 + +[devices] + +[instrument] + [instrument.ctf] + # Choose the temporary directory where to store intermediate CTF files. Default is none + # (not set), which means that $TMPDIR will be used if present, or /tmp otherwise + # tmpdir = "/tmp" + [instrument.ctf.converter] + # Indicate whether the trace converter should automatically generate the trace after + # executing a program with CTF instrumentation. Default is true + enabled = true + # Use the fast converter. This feature is experimental and generates a trace compatible + # with just a subset of Paraver cfgs. Default is false + fast = false + # Indicate the location of the ctf2prv converter script. Default is none (not set), + # which means that the $CTF2PRV will be used if present, or ctf2prv in $PATH + # otherwise + # location = "path/to/ctf2prv" + # Choose the events that will be traced + [instrument.ctf.events] + # Linux Kernel events options. Nanos6 can collect Linux kernel internal events using the + # perf_event_open system call. This requires to set /proc/sys/kernel/perf_event_paranoid + # to -1 and read permissions for either /sys/kernel/tracing or /sys/kernel/debug/tracing. + # Events can be selected individually and/or using Nanos6-defined presets + [instrument.ctf.events.kernel] + # Select one or more predefined sets of events. Available options are: + # - preemption: Trace context switches and interrupts + # - context_switch: Trace context switches + # - syscall: Trace all kernel syscalls entry and exit + # presets = [ + # "preemption" + # ] + # Provide a file with a list of individual events to enable, one event per line. + # Lines starting with "#" are omitted. + # file = "./nanos6_kernel_events" + # Exclude selected events. Disable events even if enabled in a preset or the + # user provided file. + # exclude = [ + # "sys_enter_gettimeofday", + # "sys_exit_gettimeofday" + # ] + [instrument.ovni] + # Level of detail. Selects what should be instrumented: + # 1 = Tasks + # 2 = Tasks + simple subsystem (the default) + # 3 = Tasks + full subsystem + memory (huge performance penalty) + level = 2 + [instrument.extrae] + # Indicate whether the trace should show the activity of the threads instead of the CPUs' + # activity. Default is false, which shows the activity of CPUs + as_threads = false + # Choose the detail level of the information generated in extrae traces. Default is 1 + detail_level = 1 + [instrument.graph] + # Indicate whether the resulting PDF should be opened automatically with the display + # command. Default is false + display = false + # Choose the command to be used to automatically display the resulting PDF + display_command = "xdg-open" + # Indicate whether the graph nodes should remove the directories from the source code + # file names. Default is false + shorten_filenames = false + # Indicate whether should show the internal data structures that determine when tasks + # are ready. Default is false + show_dependency_structures = false + # Include the information about the range of data or region that is covered when showing + # internal data structures. Default is false + show_regions = false + # Indicate whether should show the internal data structures that do not determine dependencies + # or that are redundant by transitivity. Default is false + show_spurious_dependency_structures = false + # Choose whether should force future and previous dependencies to be shown with different + # graphical attributes. Default is false + show_dead_dependencies = false + # Include the internal data structures after they are no longer relevant. Default is false + show_dead_dependency_structures = false + # Instead of trying to collapse in one step as many related changes as possible, show one + # at a time. Default is false + show_all_steps = false + # Show superaccess links. Default is true + show_superaccess_links = true + # Choose whether to emit a table next to the graph with a description of the changes in + # each frame. Default is false + show_log = false + [instrument.stats] + # The outfile file for emitting the statistics. Default is the standard error + output_file = "/dev/stderr" + [instrument.verbose] + # Output device or file for verbose log. Default is "/dev/stderr" + output_file = "/dev/stderr" + # Print timestamps on log. Default is true + timestamps = true + # Delay verbose output to prevent mixing with application output. Default is false + dump_only_on_exit = false + # Verbose log concepts to display. Possible values on README.md + areas = ["all", "!ComputePlaceManagement", "!DependenciesByAccess", "!DependenciesByAccessLinks", + "!DependenciesByGroup", "!LeaderThread", "!TaskStatus", "!ThreadManagement"] + +[memory] + +[misc] + # Stack size of threads created by the runtime. Default is 8M + stack_size = "8M" + +[loader] + # Enable verbose output of the loader, to debug dynamic linking problems. Default is false + verbose = false + # Choose whether to print a warning when there is any invalid NANOS6 environment variables + # defined when running a program. Default is true + warn_envars = true + # Path to the nanos6 libraries. Default is none + # library_path = "path/to/nanos6/lib" + # Prefix for the report printing all the runtime variables. Default is none + # report_prefix = "#" diff --git a/test/rt/nosv/CMakeLists.txt b/test/rt/nosv/CMakeLists.txt index e8c5707..ba660e0 100644 --- a/test/rt/nosv/CMakeLists.txt +++ b/test/rt/nosv/CMakeLists.txt @@ -15,7 +15,7 @@ endif() function(nosv_test) ovni_test(${ARGN}) - target_link_libraries("${OVNI_TEST_NAME}" nosv) + target_link_libraries("${OVNI_TEST_NAME}" PRIVATE nosv) target_include_directories("${OVNI_TEST_NAME}" PUBLIC ${NOSV_INCLUDE_DIR}) set_property(TEST "${OVNI_TEST_NAME}" APPEND