Reorganice sources

This commit is contained in:
Rodrigo Arias 2023-05-22 18:25:19 +02:00
parent 78bd792cf1
commit dd04c180a7
36 changed files with 567 additions and 100 deletions

3
.gitignore vendored
View File

@ -1 +1,4 @@
data data
build/
install/
tags

View File

@ -18,9 +18,6 @@ add_compile_options(
-Werror -Werror
) )
set(CMAKE_C_COMPILER "clang")
set(CMAKE_CXX_COMPILER "clang++")
set(CMAKE_C_VISIBILITY_PRESET hidden) set(CMAKE_C_VISIBILITY_PRESET hidden)
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
@ -76,6 +73,23 @@ find_package(MPI)
find_package(Nanos6) find_package(Nanos6)
find_package(Nodes) find_package(Nodes)
set_property(GLOBAL PROPERTY bench6_list "")
macro(mk_bench NAME)
if(NOT "${NAME}" MATCHES "b6_.*")
message(FATAL_ERROR "benchmark name must begin with b6_: ${NAME}")
endif()
add_executable(${NAME})
get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list)
message(STATUS "Before BENCH6_LIST=${BENCH6_LIST}")
list(APPEND BENCH6_LIST ${NAME})
message(STATUS "After BENCH6_LIST=${BENCH6_LIST}")
set_property(GLOBAL PROPERTY bench6_list "${BENCH6_LIST}")
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
endmacro()
add_subdirectory(src) add_subdirectory(src)
include(FeatureSummary) include(FeatureSummary)

View File

@ -1,2 +1,4 @@
add_subdirectory(bench6) add_subdirectory(bench6)
add_subdirectory(ompss2)
add_subdirectory(heat) add_subdirectory(heat)
add_subdirectory(tools)

View File

@ -1,13 +1,2 @@
add_library(bench6_common STATIC common.c) add_library(bench6_lib STATIC bench6.c common.c)
target_include_directories(bench6_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
macro(mk_bench6 NAME SOURCE)
add_executable(${NAME} ${SOURCE})
target_link_libraries(${NAME} PRIVATE bench6_common Nanos6::wrapper)
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
endmacro()
# FIXME: broken in last clang
#mk_bench6(bench6_register_deps register_deps.c)
mk_bench6(bench6_sched_add sched_add.c)
mk_bench6(bench6_sched_get sched_get.c)
mk_bench6(bench6_readywave readywave.c)

35
src/bench6/bench6.c Normal file
View File

@ -0,0 +1,35 @@
/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include "bench6.h"
//#include <nanos6/debug.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
/* Returns the current time in seconds since some point in the past */
double bench6_time(void)
{
struct timespec tv;
if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0)
{
perror("clock_gettime failed");
exit(EXIT_FAILURE);
}
return (double)(tv.tv_sec) +
(double)tv.tv_nsec * 1.0e-9;
}
int get_ncpus(void)
{
return -1;
//return (int) nanos6_get_num_cpus();
}
void
bench6_report(double time)
{
printf("time %e\n", time);
}

View File

@ -4,14 +4,9 @@
#ifndef BENCH6_H #ifndef BENCH6_H
#define BENCH6_H #define BENCH6_H
#define UNUSED(x) (void)(x) double bench6_time(void);
void bench6_report(double time);
double get_time(void);
int get_ncpus(void); int get_ncpus(void);
int bench6_creator(int argc, char *argv[]);
int bench6_sched_get(int argc, char *argv[]);
int bench6_sched_add(int argc, char *argv[]);
int bench6_register_deps(int argc, char *argv[]);
#endif /* BENCH6_H */ #endif /* BENCH6_H */

View File

@ -1,28 +1,69 @@
/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC) /* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */ * SPDX-License-Identifier: GPL-3.0-or-later */
#include "bench6.h" #include "common.h"
#include <nanos6/debug.h>
#include <time.h>
#include <stdio.h> #include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h> #include <stdlib.h>
/* Returns the current time in seconds since some point in the past */ char *progname = NULL;
double get_time(void) int is_debug_enabled = 0;
void
progname_set(char *name)
{ {
struct timespec tv; progname = name;
if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0)
{
perror("clock_gettime failed");
exit(EXIT_FAILURE);
} }
return (double)(tv.tv_sec) + void
(double)tv.tv_nsec * 1.0e-9; enable_debug(void)
{
is_debug_enabled = 1;
} }
int get_ncpus(void) static void
vaerr(const char *prefix, const char *func, const char *errstr, va_list ap)
{ {
return (int) nanos6_get_num_cpus(); if (progname != NULL)
fprintf(stderr, "%s: ", progname);
if (prefix != NULL)
fprintf(stderr, "%s: ", prefix);
if (func != NULL)
fprintf(stderr, "%s: ", func);
vfprintf(stderr, errstr, ap);
int len = strlen(errstr);
if (len > 0) {
char last = errstr[len - 1];
if (last == ':')
fprintf(stderr, " %s\n", strerror(errno));
else if (last != '\n' && last != '\r')
fprintf(stderr, "\n");
}
}
void
verr(const char *prefix, const char *func, const char *errstr, ...)
{
va_list ap;
va_start(ap, errstr);
vaerr(prefix, func, errstr, ap);
va_end(ap);
}
void
vdie(const char *prefix, const char *func, const char *errstr, ...)
{
va_list ap;
va_start(ap, errstr);
vaerr(prefix, func, errstr, ap);
va_end(ap);
abort();
} }

46
src/bench6/common.h Normal file
View File

@ -0,0 +1,46 @@
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#ifndef COMMON_H
#define COMMON_H
#include <stdio.h>
extern int is_debug_enabled;
/* Debug macros */
void progname_set(char *name);
void enable_debug(void);
void verr(const char *prefix, const char *func, const char *errstr, ...);
void vdie(const char *prefix, const char *func, const char *errstr, ...);
/* clang-format off */
#define rerr(...) fprintf(stderr, __VA_ARGS__)
#define err(...) verr("ERROR", __func__, __VA_ARGS__)
#define die(...) vdie("FATAL", __func__, __VA_ARGS__)
#define info(...) verr("INFO", NULL, __VA_ARGS__)
#define finfo(...) verr("INFO", __func__, __VA_ARGS__)
#define warn(...) verr("WARN", NULL, __VA_ARGS__)
#define dbg(...) do { \
if (unlikely(is_debug_enabled)) verr("DEBUG", __func__, __VA_ARGS__); \
} while (0);
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define UNUSED(x) (void)(x)
/* Poison assert */
#pragma GCC poison assert
#define USE_RET __attribute__((warn_unused_result))
#define ARRAYLEN(x) (sizeof(x)/sizeof((x)[0]))
/* clang-format on */
#endif /* COMMON_H */

View File

@ -1,5 +1,6 @@
add_library(heat_common STATIC common/misc.c common/kernel.c) add_library(heat_common STATIC common/misc.c common/kernel.c)
target_include_directories(heat_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(heat_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(heat_common PUBLIC m)
add_subdirectory(smp) add_subdirectory(smp)
add_subdirectory(mpi) add_subdirectory(mpi)

View File

@ -3,11 +3,11 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <limits.h>
#define IGNORE_RESIDUAL ((double) -1.0) #define IGNORE_RESIDUAL ((double) -1.0)
#define DEFAULT_DELTA ((double) 0.00005) #define DEFAULT_DELTA ((double) 0.00005)
#define DEFAULT_BS 1024 #define DEFAULT_BS 1024
#define MAX_STRING_SIZE 100
#define ROUND(a, b) ((((a) + (b) - 1) / (b)) * (b)) #define ROUND(a, b) ((((a) + (b) - 1) / (b)) * (b))
@ -30,8 +30,8 @@ typedef struct {
double *matrix; double *matrix;
int numHeatSources; int numHeatSources;
HeatSource *heatSources; HeatSource *heatSources;
char confFileName[MAX_STRING_SIZE]; char confFileName[PATH_MAX];
char imageFileName[MAX_STRING_SIZE]; char imageFileName[PATH_MAX];
bool generateImage; bool generateImage;
bool warmup; bool warmup;
bool verbose; bool verbose;
@ -47,6 +47,7 @@ void printConfiguration(const HeatConfiguration *conf);
void initializeMatrix(const HeatConfiguration *conf, double *matrix, int64_t rows, int64_t cols, int64_t rowOffset); void initializeMatrix(const HeatConfiguration *conf, double *matrix, int64_t rows, int64_t cols, int64_t rowOffset);
double getTime(void); double getTime(void);
const char *summary(void);
double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData); double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData);
void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]); void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]); double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);

52
src/heat/common/main.c Normal file
View File

@ -0,0 +1,52 @@
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include "common/heat.h"
int main(int argc, char **argv)
{
HeatConfiguration conf;
readConfiguration(argc, argv, &conf);
refineConfiguration(&conf, conf.rbs, conf.cbs);
if (conf.verbose)
printConfiguration(&conf);
int64_t rows = conf.rows+2;
int64_t cols = conf.cols+2;
initialize(&conf, rows, cols, 0);
if (conf.warmup)
solve(&conf, rows, cols, 1, NULL);
// Solve the problem
double start = getTime();
double residual = solve(&conf, rows, cols, conf.timesteps, NULL);
double end = getTime();
int64_t totalElements = conf.rows*conf.cols;
double throughput = (totalElements*conf.timesteps)/(end-start);
#ifdef _OMPSS_2
int threads = sysconf(_SC_NPROCESSORS_ONLN);
#else
int threads = 1;
#endif
fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s",
"rows", "cols", "rbs", "cbs", "threads",
"steps", "error", "time", "updates/s\n");
fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n",
conf.rows, conf.cols,
conf.rbs, conf.cbs, threads,
conf.convergenceTimesteps, residual, end-start, throughput);
if (conf.generateImage)
writeImage(conf.imageFileName, conf.matrix, rows, cols);
finalize(&conf);
return 0;
}

View File

@ -1,11 +1,11 @@
#define _POSIX_C_SOURCE 200809L
#include <assert.h> #include <assert.h>
#include <float.h> #include <float.h>
#include <getopt.h> #include <getopt.h>
#include <libgen.h>
#include <limits.h>
#include <math.h> #include <math.h>
#include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
@ -105,7 +105,11 @@ static void printUsage(int argc, char **argv)
{ {
(void) argc; (void) argc;
fprintf(stdout, "Usage: %s [OPTION]...\n", argv[0]); const char *prog = basename(argv[0]);
fprintf(stdout, "%s - %s\n", prog, summary());
fprintf(stdout, "\n");
fprintf(stdout, "Usage: %s [OPTION]...\n", prog);
fprintf(stdout, "\n");
fprintf(stdout, "Parameters:\n"); fprintf(stdout, "Parameters:\n");
fprintf(stdout, " -s, --size=SIZE use SIZExSIZE matrix as the surface\n"); fprintf(stdout, " -s, --size=SIZE use SIZExSIZE matrix as the surface\n");
fprintf(stdout, " -r, --rows=ROWS use ROWS as the number of rows of the surface\n"); fprintf(stdout, " -r, --rows=ROWS use ROWS as the number of rows of the surface\n");
@ -176,7 +180,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
conf->verbose = true; conf->verbose = true;
break; break;
case 'f': case 'f':
if (strlen(optarg) >= MAX_STRING_SIZE) { if (strlen(optarg) >= PATH_MAX) {
fprintf(stderr, "Error: Configuration name is too long!\n"); fprintf(stderr, "Error: Configuration name is too long!\n");
exit(1); exit(1);
} }
@ -185,7 +189,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
case 'o': case 'o':
conf->generateImage = true; conf->generateImage = true;
conf->warmup = false; conf->warmup = false;
if (strlen(optarg) >= MAX_STRING_SIZE) { if (strlen(optarg) >= PATH_MAX) {
fprintf(stderr, "Error: Image name is too long!\n"); fprintf(stderr, "Error: Image name is too long!\n");
exit(1); exit(1);
} }
@ -263,8 +267,8 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
static void readSourcesFile(HeatConfiguration *conf, FILE *file) static void readSourcesFile(HeatConfiguration *conf, FILE *file)
{ {
char line[MAX_STRING_SIZE]; char line[4096];
if (!fgets(line, MAX_STRING_SIZE, file)) { if (!fgets(line, 4096, file)) {
fprintf(stderr, "Error: Configuration file is not correct!\n"); fprintf(stderr, "Error: Configuration file is not correct!\n");
exit(1); exit(1);
} }
@ -283,7 +287,7 @@ static void readSourcesFile(HeatConfiguration *conf, FILE *file)
assert(conf->heatSources != NULL); assert(conf->heatSources != NULL);
for (int i = 0; i < conf->numHeatSources; i++) { for (int i = 0; i < conf->numHeatSources; i++) {
if (!fgets(line, MAX_STRING_SIZE, file)) { if (!fgets(line, 4096, file)) {
fprintf(stderr, "Error: Configuration file is not correct!\n"); fprintf(stderr, "Error: Configuration file is not correct!\n");
exit(1); exit(1);
} }

View File

@ -2,34 +2,36 @@ if(NOT MPI_FOUND)
return() return()
endif() endif()
add_library(heat_mpi_common STATIC main.c utils.c)
target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C)
macro(mk_heat_mpi NAME SOURCE) macro(mk_heat_mpi NAME SOURCE)
add_executable(${NAME} ${SOURCE}) mk_bench(${NAME})
target_sources(${NAME} PRIVATE ${SOURCE})
target_link_libraries(${NAME} PRIVATE heat_mpi_common) target_link_libraries(${NAME} PRIVATE heat_mpi_common)
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
endmacro() endmacro()
mk_heat_mpi(heat_mpi solver_mpi.c)
mk_heat_mpi(heat_mpi_nbuffer solver_mpi_nbuffer.c)
if(NANOS6_FOUND)
macro(mk_heat_mpi_nanos6 NAME SOURCE) macro(mk_heat_mpi_nanos6 NAME SOURCE)
mk_heat_mpi(${NAME} ${SOURCE}) mk_heat_mpi(${NAME} ${SOURCE})
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper) target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
endmacro() endmacro()
mk_heat_mpi_nanos6(heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c)
mk_heat_mpi_nanos6(heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c)
endif()
if(NODES_FOUND)
macro(mk_heat_mpi_nodes NAME SOURCE) macro(mk_heat_mpi_nodes NAME SOURCE)
mk_heat_mpi(${NAME} ${SOURCE}) mk_heat_mpi(${NAME} ${SOURCE})
target_link_libraries(${NAME} PRIVATE Nodes::wrapper) target_link_libraries(${NAME} PRIVATE Nodes::wrapper)
endmacro() endmacro()
mk_heat_mpi_nodes(heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c) # -------------------------------------------------------------------
mk_heat_mpi_nodes(heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c)
add_library(heat_mpi_common STATIC main.c utils.c)
target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C)
mk_heat_mpi(b6_heat_mpi solver_mpi.c)
mk_heat_mpi(b6_heat_mpi_nbuffer solver_mpi_nbuffer.c)
if(NANOS6_FOUND)
mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c)
mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c)
endif()
if(NODES_FOUND)
mk_heat_mpi_nodes(b6_heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c)
mk_heat_mpi_nodes(b6_heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c)
endif() endif()

View File

@ -3,6 +3,11 @@
#include "utils.h" #include "utils.h"
#include "common/heat.h" #include "common/heat.h"
const char *
summary(void)
{
return "Parallel version using MPI and blocking primitives";
}
static inline void send(const double *data, int nelems, int dst, int tag) static inline void send(const double *data, int nelems, int dst, int tag)
{ {

View File

@ -8,6 +8,13 @@ typedef struct {
MPI_Request recv; MPI_Request recv;
} HaloRequests; } HaloRequests;
const char *
summary(void)
{
return "Parallel version using MPI and non-blocking primitives with\n"
"overlap of computation and communication phases";
}
static inline void isend(const double *data, int nelems, int dst, int tag, HaloRequests *reqs) static inline void isend(const double *data, int nelems, int dst, int tag, HaloRequests *reqs)
{ {
MPI_Isend(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD, &reqs->send); MPI_Isend(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD, &reqs->send);

View File

@ -3,6 +3,12 @@
#include "utils.h" #include "utils.h"
#include "common/heat.h" #include "common/heat.h"
const char *
summary(void)
{
return "Parallel version using MPI + OmpSs-2 following a fork-join\n"
"parallelization.";
}
static inline void send(const double *data, int nelems, int dst, int tag) static inline void send(const double *data, int nelems, int dst, int tag)
{ {

View File

@ -3,9 +3,15 @@
#include "utils.h" #include "utils.h"
#include "common/heat.h" #include "common/heat.h"
static int serial; static int serial;
const char *
summary(void)
{
return "Parallel version using MPI + OmpSs-2 tasks where communication\n"
"tasks are serialized";
}
static inline void send(const double *data, int nelems, int dst, int tag) static inline void send(const double *data, int nelems, int dst, int tag)
{ {
MPI_Send(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD); MPI_Send(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD);

View File

@ -1,25 +1,32 @@
add_library(heat_seq_common STATIC main.c) macro(mk_heat_smp NAME SOURCE)
target_link_libraries(heat_seq_common PUBLIC heat_common) mk_bench(${NAME})
target_sources(${NAME} PRIVATE ${SOURCE})
target_link_libraries(${NAME} PRIVATE heat_smp_common)
endmacro()
add_executable(heat_seq solver_seq.c) macro(mk_heat_nanos6 NAME SOURCE)
target_link_libraries(heat_seq PUBLIC heat_seq_common) mk_heat_smp(${NAME} ${SOURCE})
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
endmacro()
macro(mk_heat_nodes NAME SOURCE)
mk_heat_smp(${NAME} ${SOURCE})
target_link_libraries(${NAME} PRIVATE Nodes::wrapper)
endmacro()
# -------------------------------------------------------------------
add_library(heat_smp_common STATIC main.c)
target_link_libraries(heat_smp_common PUBLIC heat_common)
mk_heat_smp(b6_heat_seq solver_seq.c)
if(NANOS6_FOUND) if(NANOS6_FOUND)
macro(mk_heat_nanos6 NAME SOURCE) mk_heat_nanos6(b6_heat_nanos6 solver_ompss2.c)
add_executable(${NAME} ${SOURCE}) mk_heat_nanos6(b6_heat_nanos6_residual solver_ompss2_residual.c)
target_link_libraries(${NAME} PRIVATE heat_seq_common Nanos6::wrapper)
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
endmacro()
mk_heat_nanos6(heat_nanos6 solver_ompss2.c)
mk_heat_nanos6(heat_nanos6_residual solver_ompss2_residual.c)
endif() endif()
if(NODES_FOUND) if(NODES_FOUND)
macro(mk_heat_nodes NAME SOURCE) mk_heat_nodes(b6_heat_nodes solver_ompss2.c)
add_executable(${NAME} ${SOURCE}) mk_heat_nodes(b6_heat_nodes_residual solver_ompss2_residual.c)
target_link_libraries(${NAME} PRIVATE heat_seq_common Nodes::wrapper)
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
endmacro()
mk_heat_nodes(heat_nodes solver_ompss2.c)
mk_heat_nodes(heat_nodes_residual solver_ompss2_residual.c)
endif() endif()

View File

@ -35,13 +35,16 @@ int main(int argc, char **argv)
int threads = 1; int threads = 1;
#endif #endif
fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s", fprintf(stderr, "%14s %14s %14s %8s %8s %8s %8s %8s %8s\n",
"rows", "cols", "rbs", "cbs", "threads", "time", "updates/s", "error",
"steps", "error", "time", "updates/s\n"); "rows", "cols",
fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n", "rbs", "cbs", "threads",
"steps");
fprintf(stdout, "%14e %14e %14e %8ld %8ld %8d %8d %8d %8d\n",
end-start, throughput, residual,
conf.rows, conf.cols, conf.rows, conf.cols,
conf.rbs, conf.cbs, threads, conf.rbs, conf.cbs, threads,
conf.convergenceTimesteps, residual, end-start, throughput); conf.convergenceTimesteps);
if (conf.generateImage) if (conf.generateImage)
writeImage(conf.imageFileName, conf.matrix, rows, cols); writeImage(conf.imageFileName, conf.matrix, rows, cols);

View File

@ -1,5 +1,10 @@
#include "common/heat.h" #include "common/heat.h"
const char *
summary(void)
{
return "Parallel version using OmpSs-2 tasks";
}
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb]) static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
{ {

View File

@ -2,6 +2,12 @@
#include "common/heat.h" #include "common/heat.h"
const char *
summary(void)
{
return "Parallel version using OmpSs-2 tasks and taking into account the\n"
"residual";
}
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual) static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual)
{ {

View File

@ -1,5 +1,10 @@
#include "common/heat.h" #include "common/heat.h"
const char *
summary(void)
{
return "Parallel version using OmpSs-2 tasklook";
}
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb]) static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
{ {

View File

@ -1,5 +1,10 @@
#include "common/heat.h" #include "common/heat.h"
const char *
summary(void)
{
return "Sequential solver with one CPU";
}
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, double M[rows][cols]) static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, double M[rows][cols])
{ {

12
src/ompss2/CMakeLists.txt Normal file
View File

@ -0,0 +1,12 @@
macro(mk_nanos6 NAME SOURCE)
mk_bench(${NAME})
target_sources(${NAME} PRIVATE ${SOURCE})
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
endmacro()
if(NANOS6_FOUND)
mk_bench6(b6_nanos6_register_deps register_deps.c)
mk_bench6(b6_nanos6_sched_add sched_add.c)
mk_bench6(b6_nanos6_sched_get sched_get.c)
mk_bench6(b6_nanos6_readywave readywave.c)
endif()

View File

@ -1,6 +1,4 @@
## Bench6: A set of micro-benchmarks for Nanos6 This directory contains a set of microbenchmarks for Nanos6,
This repository contains a set of microbenchmarks for Nanos6,
specifically target to expose the limitations of the runtime following specifically target to expose the limitations of the runtime following
the breakdown analysis. the breakdown analysis.

View File

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

Before

Width:  |  Height:  |  Size: 118 KiB

After

Width:  |  Height:  |  Size: 118 KiB

View File

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

View File

Before

Width:  |  Height:  |  Size: 7.1 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

7
src/tools/CMakeLists.txt Normal file
View File

@ -0,0 +1,7 @@
get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list)
configure_file(config.in.h config.h)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_executable(bench6_runner runner.c)
target_link_libraries(bench6_runner PRIVATE m bench6_lib)
install(TARGETS bench6_runner RUNTIME DESTINATION bin)

11
src/tools/config.in.h Normal file
View File

@ -0,0 +1,11 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#ifndef CONFIG_H
#define CONFIG_H
#define BENCH6_PREFIX "@CMAKE_INSTALL_PREFIX@"
#define BENCH6_BIN (BENCH6_PREFIX "/bin")
#define BENCH6_LIST "@BENCH6_LIST@"
#endif /* CONFIG_H */

199
src/tools/runner.c Normal file
View File

@ -0,0 +1,199 @@
#include "common.h"
#include "config.h"
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
//static void
//usage(void)
//{
// exit(1);
//}
struct sampling {
int nmax;
int nmin;
int n;
double *samples;
double rse;
double last;
};
static int
do_run(char *argv[], double *ptime)
{
/* Gather binary path */
char path[PATH_MAX];
sprintf(path, "%s/%s", BENCH6_BIN, argv[0]);
if (access(path, R_OK | X_OK) != 0) {
err("cannot find benchmark %s:", path);
return -1;
}
int pipefd[2];
if (pipe(pipefd) != 0) {
err("pipe failed:");
return -1;
}
/* Fork */
pid_t p = fork();
if (p < 0) {
err("fork failed:");
return -1;
}
/* In children execute benchmark */
if (p == 0) {
close(pipefd[0]);
dup2(pipefd[1], 1);
close(2);
if (execve(path, argv, NULL) != 0) {
err("execve failed:");
return -1;
}
/* Not reached */
} else {
close(pipefd[1]);
char line[4096];
FILE *f = fdopen(pipefd[0], "r");
if (f == NULL) {
err("fdopen failed:");
return -1;
}
if (fgets(line, 4096, f) == NULL) {
err("missing stdout line");
return -1;
}
char *nl = strchr(line, '\n');
if (nl != NULL)
*nl = '\0';
double time;
sscanf(line, "%le", &time);
//printf("got %e\n", time);
*ptime = time;
/* Drain the rest of the stdout */
while (fgets(line, 4096, f) != NULL) { }
fclose(f);
close(pipefd[0]);
}
return 0;
}
static void
stats(struct sampling *s)
{
if (s->n < 2)
return;
double n = s->n;
double sum = 0.0;
for (int i = 0; i < s->n; i++)
sum += s->samples[i];
double mean = sum / n;
double sumsqr = 0.0;
for (int i = 0; i < s->n; i++) {
double dev = s->samples[i] - mean;
sumsqr += dev * dev;
}
double var = sumsqr / n;
double stdev = sqrt(var);
double se = stdev / sqrt(n);
double rse = se * 1.96 / mean;
fprintf(stderr, "\rn=%d last=%e mean=%e stdev=%e se=%e rse=%e",
s->n, s->last, mean, stdev, se, rse);
s->rse = rse;
}
static int
should_continue(struct sampling *s)
{
stats(s);
if (s->n < s->nmin)
return 1;
if (s->rse * 100.0 > 1.0 /* % */)
return 1;
return 0;
}
static void
add_sample(struct sampling *s, double time)
{
if (s->n >= s->nmax) {
die("overflowing samples");
} else {
s->samples[s->n] = time;
s->n++;
s->last = time;
}
}
//static int
//compare_double(const void *a, const void *b)
//{
// double aa = *(const double *) a;
// double bb = *(const double *) b;
//
// if (aa < bb)
// return -1;
// else if (aa > bb)
// return +1;
// else
// return 0;
//}
static int
sample(char *argv[])
{
struct sampling s = { 0 };
s.nmax = 4000;
s.nmin = 30;
s.samples = calloc(s.nmax, sizeof(double));
s.n = 0;
while (should_continue(&s)) {
double time;
if (do_run(argv, &time) != 0) {
err("failed to run benchmark");
return 1;
}
add_sample(&s, time);
}
free(s.samples);
return 0;
}
int
main(int argc, char *argv[])
{
(void) argc;
if (sample(argv+1) != 0) {
err("failed to sample the benchmark");
return 1;
}
return 0;
}