From dd04c180a70064ce40504ace5bb7ef8be7624590 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Mon, 22 May 2023 18:25:19 +0200 Subject: [PATCH] Reorganice sources --- .gitignore | 3 + CMakeLists.txt | 20 +- src/CMakeLists.txt | 2 + src/bench6/CMakeLists.txt | 15 +- src/bench6/bench6.c | 35 +++ src/bench6/bench6.h | 9 +- src/bench6/common.c | 73 +++++-- src/bench6/common.h | 46 ++++ src/heat/CMakeLists.txt | 1 + src/heat/common/heat.h | 7 +- src/heat/common/main.c | 52 +++++ src/heat/common/misc.c | 22 +- src/heat/mpi/CMakeLists.txt | 46 ++-- src/heat/mpi/solver_mpi.c | 5 + src/heat/mpi/solver_mpi_nbuffer.c | 7 + src/heat/mpi/solver_mpi_ompss2_forkjoin.c | 6 + src/heat/mpi/solver_mpi_ompss2_tasks.c | 8 +- src/heat/smp/CMakeLists.txt | 43 ++-- src/heat/smp/main.c | 13 +- src/heat/smp/solver_ompss2.c | 5 + src/heat/smp/solver_ompss2_residual.c | 6 + src/heat/smp/solver_ompss2_taskloop.c | 5 + src/heat/smp/solver_seq.c | 5 + src/ompss2/CMakeLists.txt | 12 ++ README.md => src/ompss2/README.md | 4 +- .../ompss2/examples}/readywave-instr.csv.png | Bin .../ompss2/examples}/register_deps.csv.png | Bin .../ompss2/examples}/sched_add.csv.png | Bin .../ompss2/examples}/sched_get.csv.png | Bin src/{bench6 => ompss2}/readywave.c | 0 src/{bench6 => ompss2}/register_deps.c | 0 src/{bench6 => ompss2}/sched_add.c | 0 src/{bench6 => ompss2}/sched_get.c | 0 src/tools/CMakeLists.txt | 7 + src/tools/config.in.h | 11 + src/tools/runner.c | 199 ++++++++++++++++++ 36 files changed, 567 insertions(+), 100 deletions(-) create mode 100644 src/bench6/bench6.c create mode 100644 src/bench6/common.h create mode 100644 src/heat/common/main.c create mode 100644 src/ompss2/CMakeLists.txt rename README.md => src/ompss2/README.md (75%) rename {examples => src/ompss2/examples}/readywave-instr.csv.png (100%) rename {examples => src/ompss2/examples}/register_deps.csv.png (100%) rename {examples => src/ompss2/examples}/sched_add.csv.png (100%) rename {examples => src/ompss2/examples}/sched_get.csv.png (100%) rename src/{bench6 => ompss2}/readywave.c (100%) rename src/{bench6 => ompss2}/register_deps.c (100%) rename src/{bench6 => ompss2}/sched_add.c (100%) rename src/{bench6 => ompss2}/sched_get.c (100%) create mode 100644 src/tools/CMakeLists.txt create mode 100644 src/tools/config.in.h create mode 100644 src/tools/runner.c diff --git a/.gitignore b/.gitignore index 1269488..2b00aeb 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ data +build/ +install/ +tags diff --git a/CMakeLists.txt b/CMakeLists.txt index 22b3b7c..e53e51c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,9 +18,6 @@ add_compile_options( -Werror ) -set(CMAKE_C_COMPILER "clang") -set(CMAKE_CXX_COMPILER "clang++") - set(CMAKE_C_VISIBILITY_PRESET hidden) set(CMAKE_C_STANDARD 11) @@ -76,6 +73,23 @@ find_package(MPI) find_package(Nanos6) find_package(Nodes) +set_property(GLOBAL PROPERTY bench6_list "") + +macro(mk_bench NAME) + if(NOT "${NAME}" MATCHES "b6_.*") + message(FATAL_ERROR "benchmark name must begin with b6_: ${NAME}") + endif() + add_executable(${NAME}) + + get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list) + message(STATUS "Before BENCH6_LIST=${BENCH6_LIST}") + list(APPEND BENCH6_LIST ${NAME}) + message(STATUS "After BENCH6_LIST=${BENCH6_LIST}") + set_property(GLOBAL PROPERTY bench6_list "${BENCH6_LIST}") + + install(TARGETS ${NAME} RUNTIME DESTINATION bin) +endmacro() + add_subdirectory(src) include(FeatureSummary) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bd27e48..b7370b2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,2 +1,4 @@ add_subdirectory(bench6) +add_subdirectory(ompss2) add_subdirectory(heat) +add_subdirectory(tools) diff --git a/src/bench6/CMakeLists.txt b/src/bench6/CMakeLists.txt index 4b252fa..c940d0d 100644 --- a/src/bench6/CMakeLists.txt +++ b/src/bench6/CMakeLists.txt @@ -1,13 +1,2 @@ -add_library(bench6_common STATIC common.c) - -macro(mk_bench6 NAME SOURCE) - add_executable(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE bench6_common Nanos6::wrapper) - install(TARGETS ${NAME} RUNTIME DESTINATION bin) -endmacro() - -# FIXME: broken in last clang -#mk_bench6(bench6_register_deps register_deps.c) -mk_bench6(bench6_sched_add sched_add.c) -mk_bench6(bench6_sched_get sched_get.c) -mk_bench6(bench6_readywave readywave.c) +add_library(bench6_lib STATIC bench6.c common.c) +target_include_directories(bench6_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/bench6/bench6.c b/src/bench6/bench6.c new file mode 100644 index 0000000..325d59f --- /dev/null +++ b/src/bench6/bench6.c @@ -0,0 +1,35 @@ +/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "bench6.h" + +//#include +#include +#include +#include + +/* Returns the current time in seconds since some point in the past */ +double bench6_time(void) +{ + struct timespec tv; + if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0) + { + perror("clock_gettime failed"); + exit(EXIT_FAILURE); + } + + return (double)(tv.tv_sec) + + (double)tv.tv_nsec * 1.0e-9; +} + +int get_ncpus(void) +{ + return -1; + //return (int) nanos6_get_num_cpus(); +} + +void +bench6_report(double time) +{ + printf("time %e\n", time); +} diff --git a/src/bench6/bench6.h b/src/bench6/bench6.h index a8ea499..5fdc0db 100644 --- a/src/bench6/bench6.h +++ b/src/bench6/bench6.h @@ -4,14 +4,9 @@ #ifndef BENCH6_H #define BENCH6_H -#define UNUSED(x) (void)(x) +double bench6_time(void); +void bench6_report(double time); -double get_time(void); int get_ncpus(void); -int bench6_creator(int argc, char *argv[]); -int bench6_sched_get(int argc, char *argv[]); -int bench6_sched_add(int argc, char *argv[]); -int bench6_register_deps(int argc, char *argv[]); - #endif /* BENCH6_H */ diff --git a/src/bench6/common.c b/src/bench6/common.c index 050e399..31b44ee 100644 --- a/src/bench6/common.c +++ b/src/bench6/common.c @@ -1,28 +1,69 @@ -/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC) +/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC) * SPDX-License-Identifier: GPL-3.0-or-later */ -#include "bench6.h" +#include "common.h" -#include -#include #include +#include +#include +#include #include -/* Returns the current time in seconds since some point in the past */ -double get_time(void) +char *progname = NULL; +int is_debug_enabled = 0; + +void +progname_set(char *name) { - struct timespec tv; - if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0) - { - perror("clock_gettime failed"); - exit(EXIT_FAILURE); + progname = name; +} + +void +enable_debug(void) +{ + is_debug_enabled = 1; +} + +static void +vaerr(const char *prefix, const char *func, const char *errstr, va_list ap) +{ + if (progname != NULL) + fprintf(stderr, "%s: ", progname); + + if (prefix != NULL) + fprintf(stderr, "%s: ", prefix); + + if (func != NULL) + fprintf(stderr, "%s: ", func); + + vfprintf(stderr, errstr, ap); + + int len = strlen(errstr); + + if (len > 0) { + char last = errstr[len - 1]; + if (last == ':') + fprintf(stderr, " %s\n", strerror(errno)); + else if (last != '\n' && last != '\r') + fprintf(stderr, "\n"); } - - return (double)(tv.tv_sec) + - (double)tv.tv_nsec * 1.0e-9; } -int get_ncpus(void) +void +verr(const char *prefix, const char *func, const char *errstr, ...) { - return (int) nanos6_get_num_cpus(); + va_list ap; + va_start(ap, errstr); + vaerr(prefix, func, errstr, ap); + va_end(ap); +} + +void +vdie(const char *prefix, const char *func, const char *errstr, ...) +{ + va_list ap; + va_start(ap, errstr); + vaerr(prefix, func, errstr, ap); + va_end(ap); + abort(); } diff --git a/src/bench6/common.h b/src/bench6/common.h new file mode 100644 index 0000000..d76457a --- /dev/null +++ b/src/bench6/common.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#ifndef COMMON_H +#define COMMON_H + +#include + +extern int is_debug_enabled; + +/* Debug macros */ + +void progname_set(char *name); +void enable_debug(void); +void verr(const char *prefix, const char *func, const char *errstr, ...); +void vdie(const char *prefix, const char *func, const char *errstr, ...); + +/* clang-format off */ + +#define rerr(...) fprintf(stderr, __VA_ARGS__) +#define err(...) verr("ERROR", __func__, __VA_ARGS__) +#define die(...) vdie("FATAL", __func__, __VA_ARGS__) +#define info(...) verr("INFO", NULL, __VA_ARGS__) +#define finfo(...) verr("INFO", __func__, __VA_ARGS__) +#define warn(...) verr("WARN", NULL, __VA_ARGS__) + +#define dbg(...) do { \ + if (unlikely(is_debug_enabled)) verr("DEBUG", __func__, __VA_ARGS__); \ +} while (0); + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define UNUSED(x) (void)(x) + +/* Poison assert */ +#pragma GCC poison assert + +#define USE_RET __attribute__((warn_unused_result)) + +#define ARRAYLEN(x) (sizeof(x)/sizeof((x)[0])) + +/* clang-format on */ + + + +#endif /* COMMON_H */ diff --git a/src/heat/CMakeLists.txt b/src/heat/CMakeLists.txt index e5b2578..a115a7b 100644 --- a/src/heat/CMakeLists.txt +++ b/src/heat/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(heat_common STATIC common/misc.c common/kernel.c) target_include_directories(heat_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(heat_common PUBLIC m) add_subdirectory(smp) add_subdirectory(mpi) diff --git a/src/heat/common/heat.h b/src/heat/common/heat.h index d8ee05e..5e5af35 100644 --- a/src/heat/common/heat.h +++ b/src/heat/common/heat.h @@ -3,11 +3,11 @@ #include #include +#include #define IGNORE_RESIDUAL ((double) -1.0) #define DEFAULT_DELTA ((double) 0.00005) #define DEFAULT_BS 1024 -#define MAX_STRING_SIZE 100 #define ROUND(a, b) ((((a) + (b) - 1) / (b)) * (b)) @@ -30,8 +30,8 @@ typedef struct { double *matrix; int numHeatSources; HeatSource *heatSources; - char confFileName[MAX_STRING_SIZE]; - char imageFileName[MAX_STRING_SIZE]; + char confFileName[PATH_MAX]; + char imageFileName[PATH_MAX]; bool generateImage; bool warmup; bool verbose; @@ -47,6 +47,7 @@ void printConfiguration(const HeatConfiguration *conf); void initializeMatrix(const HeatConfiguration *conf, double *matrix, int64_t rows, int64_t cols, int64_t rowOffset); double getTime(void); +const char *summary(void); double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData); void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]); double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]); diff --git a/src/heat/common/main.c b/src/heat/common/main.c new file mode 100644 index 0000000..9ff1a91 --- /dev/null +++ b/src/heat/common/main.c @@ -0,0 +1,52 @@ +#include +#include +#include + +#include "common/heat.h" + + +int main(int argc, char **argv) +{ + HeatConfiguration conf; + readConfiguration(argc, argv, &conf); + refineConfiguration(&conf, conf.rbs, conf.cbs); + if (conf.verbose) + printConfiguration(&conf); + + int64_t rows = conf.rows+2; + int64_t cols = conf.cols+2; + + initialize(&conf, rows, cols, 0); + + if (conf.warmup) + solve(&conf, rows, cols, 1, NULL); + + // Solve the problem + double start = getTime(); + double residual = solve(&conf, rows, cols, conf.timesteps, NULL); + double end = getTime(); + + int64_t totalElements = conf.rows*conf.cols; + double throughput = (totalElements*conf.timesteps)/(end-start); + +#ifdef _OMPSS_2 + int threads = sysconf(_SC_NPROCESSORS_ONLN); +#else + int threads = 1; +#endif + + fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s", + "rows", "cols", "rbs", "cbs", "threads", + "steps", "error", "time", "updates/s\n"); + fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n", + conf.rows, conf.cols, + conf.rbs, conf.cbs, threads, + conf.convergenceTimesteps, residual, end-start, throughput); + + if (conf.generateImage) + writeImage(conf.imageFileName, conf.matrix, rows, cols); + + finalize(&conf); + + return 0; +} diff --git a/src/heat/common/misc.c b/src/heat/common/misc.c index 73a30c8..922892b 100644 --- a/src/heat/common/misc.c +++ b/src/heat/common/misc.c @@ -1,11 +1,11 @@ -#define _POSIX_C_SOURCE 200809L - #include #include #include +#include +#include #include -#include #include +#include #include #include #include @@ -105,7 +105,11 @@ static void printUsage(int argc, char **argv) { (void) argc; - fprintf(stdout, "Usage: %s [OPTION]...\n", argv[0]); + const char *prog = basename(argv[0]); + fprintf(stdout, "%s - %s\n", prog, summary()); + fprintf(stdout, "\n"); + fprintf(stdout, "Usage: %s [OPTION]...\n", prog); + fprintf(stdout, "\n"); fprintf(stdout, "Parameters:\n"); fprintf(stdout, " -s, --size=SIZE use SIZExSIZE matrix as the surface\n"); fprintf(stdout, " -r, --rows=ROWS use ROWS as the number of rows of the surface\n"); @@ -176,7 +180,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf) conf->verbose = true; break; case 'f': - if (strlen(optarg) >= MAX_STRING_SIZE) { + if (strlen(optarg) >= PATH_MAX) { fprintf(stderr, "Error: Configuration name is too long!\n"); exit(1); } @@ -185,7 +189,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf) case 'o': conf->generateImage = true; conf->warmup = false; - if (strlen(optarg) >= MAX_STRING_SIZE) { + if (strlen(optarg) >= PATH_MAX) { fprintf(stderr, "Error: Image name is too long!\n"); exit(1); } @@ -263,8 +267,8 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf) static void readSourcesFile(HeatConfiguration *conf, FILE *file) { - char line[MAX_STRING_SIZE]; - if (!fgets(line, MAX_STRING_SIZE, file)) { + char line[4096]; + if (!fgets(line, 4096, file)) { fprintf(stderr, "Error: Configuration file is not correct!\n"); exit(1); } @@ -283,7 +287,7 @@ static void readSourcesFile(HeatConfiguration *conf, FILE *file) assert(conf->heatSources != NULL); for (int i = 0; i < conf->numHeatSources; i++) { - if (!fgets(line, MAX_STRING_SIZE, file)) { + if (!fgets(line, 4096, file)) { fprintf(stderr, "Error: Configuration file is not correct!\n"); exit(1); } diff --git a/src/heat/mpi/CMakeLists.txt b/src/heat/mpi/CMakeLists.txt index 28e51de..bf9ac14 100644 --- a/src/heat/mpi/CMakeLists.txt +++ b/src/heat/mpi/CMakeLists.txt @@ -2,34 +2,36 @@ if(NOT MPI_FOUND) return() endif() +macro(mk_heat_mpi NAME SOURCE) + mk_bench(${NAME}) + target_sources(${NAME} PRIVATE ${SOURCE}) + target_link_libraries(${NAME} PRIVATE heat_mpi_common) +endmacro() + +macro(mk_heat_mpi_nanos6 NAME SOURCE) + mk_heat_mpi(${NAME} ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Nanos6::wrapper) +endmacro() + +macro(mk_heat_mpi_nodes NAME SOURCE) + mk_heat_mpi(${NAME} ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Nodes::wrapper) +endmacro() + +# ------------------------------------------------------------------- + add_library(heat_mpi_common STATIC main.c utils.c) target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C) -macro(mk_heat_mpi NAME SOURCE) - add_executable(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE heat_mpi_common) - install(TARGETS ${NAME} RUNTIME DESTINATION bin) -endmacro() - -mk_heat_mpi(heat_mpi solver_mpi.c) -mk_heat_mpi(heat_mpi_nbuffer solver_mpi_nbuffer.c) +mk_heat_mpi(b6_heat_mpi solver_mpi.c) +mk_heat_mpi(b6_heat_mpi_nbuffer solver_mpi_nbuffer.c) if(NANOS6_FOUND) - macro(mk_heat_mpi_nanos6 NAME SOURCE) - mk_heat_mpi(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE Nanos6::wrapper) - endmacro() - - mk_heat_mpi_nanos6(heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c) - mk_heat_mpi_nanos6(heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c) + mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c) + mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c) endif() if(NODES_FOUND) - macro(mk_heat_mpi_nodes NAME SOURCE) - mk_heat_mpi(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE Nodes::wrapper) - endmacro() - - mk_heat_mpi_nodes(heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c) - mk_heat_mpi_nodes(heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c) + mk_heat_mpi_nodes(b6_heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c) + mk_heat_mpi_nodes(b6_heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c) endif() diff --git a/src/heat/mpi/solver_mpi.c b/src/heat/mpi/solver_mpi.c index 4c44f61..42f6932 100644 --- a/src/heat/mpi/solver_mpi.c +++ b/src/heat/mpi/solver_mpi.c @@ -3,6 +3,11 @@ #include "utils.h" #include "common/heat.h" +const char * +summary(void) +{ + return "Parallel version using MPI and blocking primitives"; +} static inline void send(const double *data, int nelems, int dst, int tag) { diff --git a/src/heat/mpi/solver_mpi_nbuffer.c b/src/heat/mpi/solver_mpi_nbuffer.c index 457ea82..52a1d78 100644 --- a/src/heat/mpi/solver_mpi_nbuffer.c +++ b/src/heat/mpi/solver_mpi_nbuffer.c @@ -8,6 +8,13 @@ typedef struct { MPI_Request recv; } HaloRequests; +const char * +summary(void) +{ + return "Parallel version using MPI and non-blocking primitives with\n" + "overlap of computation and communication phases"; +} + static inline void isend(const double *data, int nelems, int dst, int tag, HaloRequests *reqs) { MPI_Isend(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD, &reqs->send); diff --git a/src/heat/mpi/solver_mpi_ompss2_forkjoin.c b/src/heat/mpi/solver_mpi_ompss2_forkjoin.c index 02a9db1..72f7e35 100644 --- a/src/heat/mpi/solver_mpi_ompss2_forkjoin.c +++ b/src/heat/mpi/solver_mpi_ompss2_forkjoin.c @@ -3,6 +3,12 @@ #include "utils.h" #include "common/heat.h" +const char * +summary(void) +{ + return "Parallel version using MPI + OmpSs-2 following a fork-join\n" + "parallelization."; +} static inline void send(const double *data, int nelems, int dst, int tag) { diff --git a/src/heat/mpi/solver_mpi_ompss2_tasks.c b/src/heat/mpi/solver_mpi_ompss2_tasks.c index 15a7bd2..c60cb60 100644 --- a/src/heat/mpi/solver_mpi_ompss2_tasks.c +++ b/src/heat/mpi/solver_mpi_ompss2_tasks.c @@ -3,9 +3,15 @@ #include "utils.h" #include "common/heat.h" - static int serial; +const char * +summary(void) +{ + return "Parallel version using MPI + OmpSs-2 tasks where communication\n" + "tasks are serialized"; +} + static inline void send(const double *data, int nelems, int dst, int tag) { MPI_Send(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD); diff --git a/src/heat/smp/CMakeLists.txt b/src/heat/smp/CMakeLists.txt index b52f3f3..4763b2f 100644 --- a/src/heat/smp/CMakeLists.txt +++ b/src/heat/smp/CMakeLists.txt @@ -1,25 +1,32 @@ -add_library(heat_seq_common STATIC main.c) -target_link_libraries(heat_seq_common PUBLIC heat_common) +macro(mk_heat_smp NAME SOURCE) + mk_bench(${NAME}) + target_sources(${NAME} PRIVATE ${SOURCE}) + target_link_libraries(${NAME} PRIVATE heat_smp_common) +endmacro() -add_executable(heat_seq solver_seq.c) -target_link_libraries(heat_seq PUBLIC heat_seq_common) +macro(mk_heat_nanos6 NAME SOURCE) + mk_heat_smp(${NAME} ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Nanos6::wrapper) +endmacro() + +macro(mk_heat_nodes NAME SOURCE) + mk_heat_smp(${NAME} ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Nodes::wrapper) +endmacro() + +# ------------------------------------------------------------------- + +add_library(heat_smp_common STATIC main.c) +target_link_libraries(heat_smp_common PUBLIC heat_common) + +mk_heat_smp(b6_heat_seq solver_seq.c) if(NANOS6_FOUND) - macro(mk_heat_nanos6 NAME SOURCE) - add_executable(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE heat_seq_common Nanos6::wrapper) - install(TARGETS ${NAME} RUNTIME DESTINATION bin) - endmacro() - mk_heat_nanos6(heat_nanos6 solver_ompss2.c) - mk_heat_nanos6(heat_nanos6_residual solver_ompss2_residual.c) + mk_heat_nanos6(b6_heat_nanos6 solver_ompss2.c) + mk_heat_nanos6(b6_heat_nanos6_residual solver_ompss2_residual.c) endif() if(NODES_FOUND) - macro(mk_heat_nodes NAME SOURCE) - add_executable(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE heat_seq_common Nodes::wrapper) - install(TARGETS ${NAME} RUNTIME DESTINATION bin) - endmacro() - mk_heat_nodes(heat_nodes solver_ompss2.c) - mk_heat_nodes(heat_nodes_residual solver_ompss2_residual.c) + mk_heat_nodes(b6_heat_nodes solver_ompss2.c) + mk_heat_nodes(b6_heat_nodes_residual solver_ompss2_residual.c) endif() diff --git a/src/heat/smp/main.c b/src/heat/smp/main.c index 9ff1a91..a5c3577 100644 --- a/src/heat/smp/main.c +++ b/src/heat/smp/main.c @@ -35,13 +35,16 @@ int main(int argc, char **argv) int threads = 1; #endif - fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s", - "rows", "cols", "rbs", "cbs", "threads", - "steps", "error", "time", "updates/s\n"); - fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n", + fprintf(stderr, "%14s %14s %14s %8s %8s %8s %8s %8s %8s\n", + "time", "updates/s", "error", + "rows", "cols", + "rbs", "cbs", "threads", + "steps"); + fprintf(stdout, "%14e %14e %14e %8ld %8ld %8d %8d %8d %8d\n", + end-start, throughput, residual, conf.rows, conf.cols, conf.rbs, conf.cbs, threads, - conf.convergenceTimesteps, residual, end-start, throughput); + conf.convergenceTimesteps); if (conf.generateImage) writeImage(conf.imageFileName, conf.matrix, rows, cols); diff --git a/src/heat/smp/solver_ompss2.c b/src/heat/smp/solver_ompss2.c index d2080a1..36b009e 100644 --- a/src/heat/smp/solver_ompss2.c +++ b/src/heat/smp/solver_ompss2.c @@ -1,5 +1,10 @@ #include "common/heat.h" +const char * +summary(void) +{ + return "Parallel version using OmpSs-2 tasks"; +} static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb]) { diff --git a/src/heat/smp/solver_ompss2_residual.c b/src/heat/smp/solver_ompss2_residual.c index a6f8f6e..03baa7f 100644 --- a/src/heat/smp/solver_ompss2_residual.c +++ b/src/heat/smp/solver_ompss2_residual.c @@ -2,6 +2,12 @@ #include "common/heat.h" +const char * +summary(void) +{ + return "Parallel version using OmpSs-2 tasks and taking into account the\n" + "residual"; +} static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual) { diff --git a/src/heat/smp/solver_ompss2_taskloop.c b/src/heat/smp/solver_ompss2_taskloop.c index 20eec9f..7bf065b 100644 --- a/src/heat/smp/solver_ompss2_taskloop.c +++ b/src/heat/smp/solver_ompss2_taskloop.c @@ -1,5 +1,10 @@ #include "common/heat.h" +const char * +summary(void) +{ + return "Parallel version using OmpSs-2 tasklook"; +} static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb]) { diff --git a/src/heat/smp/solver_seq.c b/src/heat/smp/solver_seq.c index 700a3c8..456ac86 100644 --- a/src/heat/smp/solver_seq.c +++ b/src/heat/smp/solver_seq.c @@ -1,5 +1,10 @@ #include "common/heat.h" +const char * +summary(void) +{ + return "Sequential solver with one CPU"; +} static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, double M[rows][cols]) { diff --git a/src/ompss2/CMakeLists.txt b/src/ompss2/CMakeLists.txt new file mode 100644 index 0000000..25e1ea2 --- /dev/null +++ b/src/ompss2/CMakeLists.txt @@ -0,0 +1,12 @@ +macro(mk_nanos6 NAME SOURCE) + mk_bench(${NAME}) + target_sources(${NAME} PRIVATE ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Nanos6::wrapper) +endmacro() + +if(NANOS6_FOUND) + mk_bench6(b6_nanos6_register_deps register_deps.c) + mk_bench6(b6_nanos6_sched_add sched_add.c) + mk_bench6(b6_nanos6_sched_get sched_get.c) + mk_bench6(b6_nanos6_readywave readywave.c) +endif() diff --git a/README.md b/src/ompss2/README.md similarity index 75% rename from README.md rename to src/ompss2/README.md index 7ed7f9b..e8764f8 100644 --- a/README.md +++ b/src/ompss2/README.md @@ -1,6 +1,4 @@ -## Bench6: A set of micro-benchmarks for Nanos6 - -This repository contains a set of microbenchmarks for Nanos6, +This directory contains a set of microbenchmarks for Nanos6, specifically target to expose the limitations of the runtime following the breakdown analysis. diff --git a/examples/readywave-instr.csv.png b/src/ompss2/examples/readywave-instr.csv.png similarity index 100% rename from examples/readywave-instr.csv.png rename to src/ompss2/examples/readywave-instr.csv.png diff --git a/examples/register_deps.csv.png b/src/ompss2/examples/register_deps.csv.png similarity index 100% rename from examples/register_deps.csv.png rename to src/ompss2/examples/register_deps.csv.png diff --git a/examples/sched_add.csv.png b/src/ompss2/examples/sched_add.csv.png similarity index 100% rename from examples/sched_add.csv.png rename to src/ompss2/examples/sched_add.csv.png diff --git a/examples/sched_get.csv.png b/src/ompss2/examples/sched_get.csv.png similarity index 100% rename from examples/sched_get.csv.png rename to src/ompss2/examples/sched_get.csv.png diff --git a/src/bench6/readywave.c b/src/ompss2/readywave.c similarity index 100% rename from src/bench6/readywave.c rename to src/ompss2/readywave.c diff --git a/src/bench6/register_deps.c b/src/ompss2/register_deps.c similarity index 100% rename from src/bench6/register_deps.c rename to src/ompss2/register_deps.c diff --git a/src/bench6/sched_add.c b/src/ompss2/sched_add.c similarity index 100% rename from src/bench6/sched_add.c rename to src/ompss2/sched_add.c diff --git a/src/bench6/sched_get.c b/src/ompss2/sched_get.c similarity index 100% rename from src/bench6/sched_get.c rename to src/ompss2/sched_get.c diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt new file mode 100644 index 0000000..2c6ed35 --- /dev/null +++ b/src/tools/CMakeLists.txt @@ -0,0 +1,7 @@ +get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list) +configure_file(config.in.h config.h) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +add_executable(bench6_runner runner.c) +target_link_libraries(bench6_runner PRIVATE m bench6_lib) +install(TARGETS bench6_runner RUNTIME DESTINATION bin) diff --git a/src/tools/config.in.h b/src/tools/config.in.h new file mode 100644 index 0000000..3e51d90 --- /dev/null +++ b/src/tools/config.in.h @@ -0,0 +1,11 @@ +/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#ifndef CONFIG_H +#define CONFIG_H + +#define BENCH6_PREFIX "@CMAKE_INSTALL_PREFIX@" +#define BENCH6_BIN (BENCH6_PREFIX "/bin") +#define BENCH6_LIST "@BENCH6_LIST@" + +#endif /* CONFIG_H */ diff --git a/src/tools/runner.c b/src/tools/runner.c new file mode 100644 index 0000000..685da3d --- /dev/null +++ b/src/tools/runner.c @@ -0,0 +1,199 @@ +#include "common.h" +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include + +//static void +//usage(void) +//{ +// exit(1); +//} + +struct sampling { + int nmax; + int nmin; + int n; + double *samples; + double rse; + double last; +}; + +static int +do_run(char *argv[], double *ptime) +{ + /* Gather binary path */ + char path[PATH_MAX]; + sprintf(path, "%s/%s", BENCH6_BIN, argv[0]); + + if (access(path, R_OK | X_OK) != 0) { + err("cannot find benchmark %s:", path); + return -1; + } + + int pipefd[2]; + if (pipe(pipefd) != 0) { + err("pipe failed:"); + return -1; + } + + /* Fork */ + pid_t p = fork(); + + if (p < 0) { + err("fork failed:"); + return -1; + } + + /* In children execute benchmark */ + if (p == 0) { + close(pipefd[0]); + dup2(pipefd[1], 1); + close(2); + if (execve(path, argv, NULL) != 0) { + err("execve failed:"); + return -1; + } + /* Not reached */ + } else { + close(pipefd[1]); + char line[4096]; + FILE *f = fdopen(pipefd[0], "r"); + if (f == NULL) { + err("fdopen failed:"); + return -1; + } + + if (fgets(line, 4096, f) == NULL) { + err("missing stdout line"); + return -1; + } + + char *nl = strchr(line, '\n'); + if (nl != NULL) + *nl = '\0'; + + double time; + sscanf(line, "%le", &time); + //printf("got %e\n", time); + *ptime = time; + + /* Drain the rest of the stdout */ + while (fgets(line, 4096, f) != NULL) { } + fclose(f); + close(pipefd[0]); + } + + return 0; +} + +static void +stats(struct sampling *s) +{ + if (s->n < 2) + return; + + double n = s->n; + double sum = 0.0; + for (int i = 0; i < s->n; i++) + sum += s->samples[i]; + + double mean = sum / n; + double sumsqr = 0.0; + for (int i = 0; i < s->n; i++) { + double dev = s->samples[i] - mean; + sumsqr += dev * dev; + } + + double var = sumsqr / n; + double stdev = sqrt(var); + double se = stdev / sqrt(n); + double rse = se * 1.96 / mean; + + fprintf(stderr, "\rn=%d last=%e mean=%e stdev=%e se=%e rse=%e", + s->n, s->last, mean, stdev, se, rse); + + s->rse = rse; +} + +static int +should_continue(struct sampling *s) +{ + stats(s); + + if (s->n < s->nmin) + return 1; + + if (s->rse * 100.0 > 1.0 /* % */) + return 1; + + return 0; +} + +static void +add_sample(struct sampling *s, double time) +{ + if (s->n >= s->nmax) { + die("overflowing samples"); + } else { + s->samples[s->n] = time; + s->n++; + s->last = time; + } +} + +//static int +//compare_double(const void *a, const void *b) +//{ +// double aa = *(const double *) a; +// double bb = *(const double *) b; +// +// if (aa < bb) +// return -1; +// else if (aa > bb) +// return +1; +// else +// return 0; +//} + +static int +sample(char *argv[]) +{ + struct sampling s = { 0 }; + s.nmax = 4000; + s.nmin = 30; + s.samples = calloc(s.nmax, sizeof(double)); + s.n = 0; + + while (should_continue(&s)) { + double time; + if (do_run(argv, &time) != 0) { + err("failed to run benchmark"); + return 1; + } + + add_sample(&s, time); + } + + free(s.samples); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + (void) argc; + + if (sample(argv+1) != 0) { + err("failed to sample the benchmark"); + return 1; + } + + return 0; +}