Reorganice sources
3
.gitignore
vendored
@ -1 +1,4 @@
|
||||
data
|
||||
build/
|
||||
install/
|
||||
tags
|
||||
|
@ -18,9 +18,6 @@ add_compile_options(
|
||||
-Werror
|
||||
)
|
||||
|
||||
set(CMAKE_C_COMPILER "clang")
|
||||
set(CMAKE_CXX_COMPILER "clang++")
|
||||
|
||||
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
@ -76,6 +73,23 @@ find_package(MPI)
|
||||
find_package(Nanos6)
|
||||
find_package(Nodes)
|
||||
|
||||
set_property(GLOBAL PROPERTY bench6_list "")
|
||||
|
||||
macro(mk_bench NAME)
|
||||
if(NOT "${NAME}" MATCHES "b6_.*")
|
||||
message(FATAL_ERROR "benchmark name must begin with b6_: ${NAME}")
|
||||
endif()
|
||||
add_executable(${NAME})
|
||||
|
||||
get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list)
|
||||
message(STATUS "Before BENCH6_LIST=${BENCH6_LIST}")
|
||||
list(APPEND BENCH6_LIST ${NAME})
|
||||
message(STATUS "After BENCH6_LIST=${BENCH6_LIST}")
|
||||
set_property(GLOBAL PROPERTY bench6_list "${BENCH6_LIST}")
|
||||
|
||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
||||
endmacro()
|
||||
|
||||
add_subdirectory(src)
|
||||
|
||||
include(FeatureSummary)
|
||||
|
@ -1,2 +1,4 @@
|
||||
add_subdirectory(bench6)
|
||||
add_subdirectory(ompss2)
|
||||
add_subdirectory(heat)
|
||||
add_subdirectory(tools)
|
||||
|
@ -1,13 +1,2 @@
|
||||
add_library(bench6_common STATIC common.c)
|
||||
|
||||
macro(mk_bench6 NAME SOURCE)
|
||||
add_executable(${NAME} ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE bench6_common Nanos6::wrapper)
|
||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
||||
endmacro()
|
||||
|
||||
# FIXME: broken in last clang
|
||||
#mk_bench6(bench6_register_deps register_deps.c)
|
||||
mk_bench6(bench6_sched_add sched_add.c)
|
||||
mk_bench6(bench6_sched_get sched_get.c)
|
||||
mk_bench6(bench6_readywave readywave.c)
|
||||
add_library(bench6_lib STATIC bench6.c common.c)
|
||||
target_include_directories(bench6_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
35
src/bench6/bench6.c
Normal file
@ -0,0 +1,35 @@
|
||||
/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "bench6.h"
|
||||
|
||||
//#include <nanos6/debug.h>
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Returns the current time in seconds since some point in the past */
|
||||
double bench6_time(void)
|
||||
{
|
||||
struct timespec tv;
|
||||
if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0)
|
||||
{
|
||||
perror("clock_gettime failed");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return (double)(tv.tv_sec) +
|
||||
(double)tv.tv_nsec * 1.0e-9;
|
||||
}
|
||||
|
||||
int get_ncpus(void)
|
||||
{
|
||||
return -1;
|
||||
//return (int) nanos6_get_num_cpus();
|
||||
}
|
||||
|
||||
void
|
||||
bench6_report(double time)
|
||||
{
|
||||
printf("time %e\n", time);
|
||||
}
|
@ -4,14 +4,9 @@
|
||||
#ifndef BENCH6_H
|
||||
#define BENCH6_H
|
||||
|
||||
#define UNUSED(x) (void)(x)
|
||||
double bench6_time(void);
|
||||
void bench6_report(double time);
|
||||
|
||||
double get_time(void);
|
||||
int get_ncpus(void);
|
||||
|
||||
int bench6_creator(int argc, char *argv[]);
|
||||
int bench6_sched_get(int argc, char *argv[]);
|
||||
int bench6_sched_add(int argc, char *argv[]);
|
||||
int bench6_register_deps(int argc, char *argv[]);
|
||||
|
||||
#endif /* BENCH6_H */
|
||||
|
@ -1,28 +1,69 @@
|
||||
/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC)
|
||||
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "bench6.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <nanos6/debug.h>
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Returns the current time in seconds since some point in the past */
|
||||
double get_time(void)
|
||||
char *progname = NULL;
|
||||
int is_debug_enabled = 0;
|
||||
|
||||
void
|
||||
progname_set(char *name)
|
||||
{
|
||||
struct timespec tv;
|
||||
if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0)
|
||||
{
|
||||
perror("clock_gettime failed");
|
||||
exit(EXIT_FAILURE);
|
||||
progname = name;
|
||||
}
|
||||
|
||||
return (double)(tv.tv_sec) +
|
||||
(double)tv.tv_nsec * 1.0e-9;
|
||||
void
|
||||
enable_debug(void)
|
||||
{
|
||||
is_debug_enabled = 1;
|
||||
}
|
||||
|
||||
int get_ncpus(void)
|
||||
static void
|
||||
vaerr(const char *prefix, const char *func, const char *errstr, va_list ap)
|
||||
{
|
||||
return (int) nanos6_get_num_cpus();
|
||||
if (progname != NULL)
|
||||
fprintf(stderr, "%s: ", progname);
|
||||
|
||||
if (prefix != NULL)
|
||||
fprintf(stderr, "%s: ", prefix);
|
||||
|
||||
if (func != NULL)
|
||||
fprintf(stderr, "%s: ", func);
|
||||
|
||||
vfprintf(stderr, errstr, ap);
|
||||
|
||||
int len = strlen(errstr);
|
||||
|
||||
if (len > 0) {
|
||||
char last = errstr[len - 1];
|
||||
if (last == ':')
|
||||
fprintf(stderr, " %s\n", strerror(errno));
|
||||
else if (last != '\n' && last != '\r')
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
verr(const char *prefix, const char *func, const char *errstr, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, errstr);
|
||||
vaerr(prefix, func, errstr, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void
|
||||
vdie(const char *prefix, const char *func, const char *errstr, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, errstr);
|
||||
vaerr(prefix, func, errstr, ap);
|
||||
va_end(ap);
|
||||
abort();
|
||||
}
|
||||
|
46
src/bench6/common.h
Normal file
@ -0,0 +1,46 @@
|
||||
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
extern int is_debug_enabled;
|
||||
|
||||
/* Debug macros */
|
||||
|
||||
void progname_set(char *name);
|
||||
void enable_debug(void);
|
||||
void verr(const char *prefix, const char *func, const char *errstr, ...);
|
||||
void vdie(const char *prefix, const char *func, const char *errstr, ...);
|
||||
|
||||
/* clang-format off */
|
||||
|
||||
#define rerr(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define err(...) verr("ERROR", __func__, __VA_ARGS__)
|
||||
#define die(...) vdie("FATAL", __func__, __VA_ARGS__)
|
||||
#define info(...) verr("INFO", NULL, __VA_ARGS__)
|
||||
#define finfo(...) verr("INFO", __func__, __VA_ARGS__)
|
||||
#define warn(...) verr("WARN", NULL, __VA_ARGS__)
|
||||
|
||||
#define dbg(...) do { \
|
||||
if (unlikely(is_debug_enabled)) verr("DEBUG", __func__, __VA_ARGS__); \
|
||||
} while (0);
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#define UNUSED(x) (void)(x)
|
||||
|
||||
/* Poison assert */
|
||||
#pragma GCC poison assert
|
||||
|
||||
#define USE_RET __attribute__((warn_unused_result))
|
||||
|
||||
#define ARRAYLEN(x) (sizeof(x)/sizeof((x)[0]))
|
||||
|
||||
/* clang-format on */
|
||||
|
||||
|
||||
|
||||
#endif /* COMMON_H */
|
@ -1,5 +1,6 @@
|
||||
add_library(heat_common STATIC common/misc.c common/kernel.c)
|
||||
target_include_directories(heat_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(heat_common PUBLIC m)
|
||||
|
||||
add_subdirectory(smp)
|
||||
add_subdirectory(mpi)
|
||||
|
@ -3,11 +3,11 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define IGNORE_RESIDUAL ((double) -1.0)
|
||||
#define DEFAULT_DELTA ((double) 0.00005)
|
||||
#define DEFAULT_BS 1024
|
||||
#define MAX_STRING_SIZE 100
|
||||
|
||||
#define ROUND(a, b) ((((a) + (b) - 1) / (b)) * (b))
|
||||
|
||||
@ -30,8 +30,8 @@ typedef struct {
|
||||
double *matrix;
|
||||
int numHeatSources;
|
||||
HeatSource *heatSources;
|
||||
char confFileName[MAX_STRING_SIZE];
|
||||
char imageFileName[MAX_STRING_SIZE];
|
||||
char confFileName[PATH_MAX];
|
||||
char imageFileName[PATH_MAX];
|
||||
bool generateImage;
|
||||
bool warmup;
|
||||
bool verbose;
|
||||
@ -47,6 +47,7 @@ void printConfiguration(const HeatConfiguration *conf);
|
||||
void initializeMatrix(const HeatConfiguration *conf, double *matrix, int64_t rows, int64_t cols, int64_t rowOffset);
|
||||
double getTime(void);
|
||||
|
||||
const char *summary(void);
|
||||
double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData);
|
||||
void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
|
||||
double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
|
||||
|
52
src/heat/common/main.c
Normal file
@ -0,0 +1,52 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common/heat.h"
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
HeatConfiguration conf;
|
||||
readConfiguration(argc, argv, &conf);
|
||||
refineConfiguration(&conf, conf.rbs, conf.cbs);
|
||||
if (conf.verbose)
|
||||
printConfiguration(&conf);
|
||||
|
||||
int64_t rows = conf.rows+2;
|
||||
int64_t cols = conf.cols+2;
|
||||
|
||||
initialize(&conf, rows, cols, 0);
|
||||
|
||||
if (conf.warmup)
|
||||
solve(&conf, rows, cols, 1, NULL);
|
||||
|
||||
// Solve the problem
|
||||
double start = getTime();
|
||||
double residual = solve(&conf, rows, cols, conf.timesteps, NULL);
|
||||
double end = getTime();
|
||||
|
||||
int64_t totalElements = conf.rows*conf.cols;
|
||||
double throughput = (totalElements*conf.timesteps)/(end-start);
|
||||
|
||||
#ifdef _OMPSS_2
|
||||
int threads = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#else
|
||||
int threads = 1;
|
||||
#endif
|
||||
|
||||
fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s",
|
||||
"rows", "cols", "rbs", "cbs", "threads",
|
||||
"steps", "error", "time", "updates/s\n");
|
||||
fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n",
|
||||
conf.rows, conf.cols,
|
||||
conf.rbs, conf.cbs, threads,
|
||||
conf.convergenceTimesteps, residual, end-start, throughput);
|
||||
|
||||
if (conf.generateImage)
|
||||
writeImage(conf.imageFileName, conf.matrix, rows, cols);
|
||||
|
||||
finalize(&conf);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <getopt.h>
|
||||
#include <libgen.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
@ -105,7 +105,11 @@ static void printUsage(int argc, char **argv)
|
||||
{
|
||||
(void) argc;
|
||||
|
||||
fprintf(stdout, "Usage: %s [OPTION]...\n", argv[0]);
|
||||
const char *prog = basename(argv[0]);
|
||||
fprintf(stdout, "%s - %s\n", prog, summary());
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "Usage: %s [OPTION]...\n", prog);
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "Parameters:\n");
|
||||
fprintf(stdout, " -s, --size=SIZE use SIZExSIZE matrix as the surface\n");
|
||||
fprintf(stdout, " -r, --rows=ROWS use ROWS as the number of rows of the surface\n");
|
||||
@ -176,7 +180,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
|
||||
conf->verbose = true;
|
||||
break;
|
||||
case 'f':
|
||||
if (strlen(optarg) >= MAX_STRING_SIZE) {
|
||||
if (strlen(optarg) >= PATH_MAX) {
|
||||
fprintf(stderr, "Error: Configuration name is too long!\n");
|
||||
exit(1);
|
||||
}
|
||||
@ -185,7 +189,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
|
||||
case 'o':
|
||||
conf->generateImage = true;
|
||||
conf->warmup = false;
|
||||
if (strlen(optarg) >= MAX_STRING_SIZE) {
|
||||
if (strlen(optarg) >= PATH_MAX) {
|
||||
fprintf(stderr, "Error: Image name is too long!\n");
|
||||
exit(1);
|
||||
}
|
||||
@ -263,8 +267,8 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
|
||||
|
||||
static void readSourcesFile(HeatConfiguration *conf, FILE *file)
|
||||
{
|
||||
char line[MAX_STRING_SIZE];
|
||||
if (!fgets(line, MAX_STRING_SIZE, file)) {
|
||||
char line[4096];
|
||||
if (!fgets(line, 4096, file)) {
|
||||
fprintf(stderr, "Error: Configuration file is not correct!\n");
|
||||
exit(1);
|
||||
}
|
||||
@ -283,7 +287,7 @@ static void readSourcesFile(HeatConfiguration *conf, FILE *file)
|
||||
assert(conf->heatSources != NULL);
|
||||
|
||||
for (int i = 0; i < conf->numHeatSources; i++) {
|
||||
if (!fgets(line, MAX_STRING_SIZE, file)) {
|
||||
if (!fgets(line, 4096, file)) {
|
||||
fprintf(stderr, "Error: Configuration file is not correct!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
@ -2,34 +2,36 @@ if(NOT MPI_FOUND)
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_library(heat_mpi_common STATIC main.c utils.c)
|
||||
target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C)
|
||||
|
||||
macro(mk_heat_mpi NAME SOURCE)
|
||||
add_executable(${NAME} ${SOURCE})
|
||||
mk_bench(${NAME})
|
||||
target_sources(${NAME} PRIVATE ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE heat_mpi_common)
|
||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
||||
endmacro()
|
||||
|
||||
mk_heat_mpi(heat_mpi solver_mpi.c)
|
||||
mk_heat_mpi(heat_mpi_nbuffer solver_mpi_nbuffer.c)
|
||||
|
||||
if(NANOS6_FOUND)
|
||||
macro(mk_heat_mpi_nanos6 NAME SOURCE)
|
||||
mk_heat_mpi(${NAME} ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
|
||||
endmacro()
|
||||
|
||||
mk_heat_mpi_nanos6(heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c)
|
||||
mk_heat_mpi_nanos6(heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c)
|
||||
endif()
|
||||
|
||||
if(NODES_FOUND)
|
||||
macro(mk_heat_mpi_nodes NAME SOURCE)
|
||||
mk_heat_mpi(${NAME} ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE Nodes::wrapper)
|
||||
endmacro()
|
||||
|
||||
mk_heat_mpi_nodes(heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c)
|
||||
mk_heat_mpi_nodes(heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c)
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
add_library(heat_mpi_common STATIC main.c utils.c)
|
||||
target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C)
|
||||
|
||||
mk_heat_mpi(b6_heat_mpi solver_mpi.c)
|
||||
mk_heat_mpi(b6_heat_mpi_nbuffer solver_mpi_nbuffer.c)
|
||||
|
||||
if(NANOS6_FOUND)
|
||||
mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c)
|
||||
mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c)
|
||||
endif()
|
||||
|
||||
if(NODES_FOUND)
|
||||
mk_heat_mpi_nodes(b6_heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c)
|
||||
mk_heat_mpi_nodes(b6_heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c)
|
||||
endif()
|
||||
|
@ -3,6 +3,11 @@
|
||||
#include "utils.h"
|
||||
#include "common/heat.h"
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Parallel version using MPI and blocking primitives";
|
||||
}
|
||||
|
||||
static inline void send(const double *data, int nelems, int dst, int tag)
|
||||
{
|
||||
|
@ -8,6 +8,13 @@ typedef struct {
|
||||
MPI_Request recv;
|
||||
} HaloRequests;
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Parallel version using MPI and non-blocking primitives with\n"
|
||||
"overlap of computation and communication phases";
|
||||
}
|
||||
|
||||
static inline void isend(const double *data, int nelems, int dst, int tag, HaloRequests *reqs)
|
||||
{
|
||||
MPI_Isend(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD, &reqs->send);
|
||||
|
@ -3,6 +3,12 @@
|
||||
#include "utils.h"
|
||||
#include "common/heat.h"
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Parallel version using MPI + OmpSs-2 following a fork-join\n"
|
||||
"parallelization.";
|
||||
}
|
||||
|
||||
static inline void send(const double *data, int nelems, int dst, int tag)
|
||||
{
|
||||
|
@ -3,9 +3,15 @@
|
||||
#include "utils.h"
|
||||
#include "common/heat.h"
|
||||
|
||||
|
||||
static int serial;
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Parallel version using MPI + OmpSs-2 tasks where communication\n"
|
||||
"tasks are serialized";
|
||||
}
|
||||
|
||||
static inline void send(const double *data, int nelems, int dst, int tag)
|
||||
{
|
||||
MPI_Send(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD);
|
||||
|
@ -1,25 +1,32 @@
|
||||
add_library(heat_seq_common STATIC main.c)
|
||||
target_link_libraries(heat_seq_common PUBLIC heat_common)
|
||||
macro(mk_heat_smp NAME SOURCE)
|
||||
mk_bench(${NAME})
|
||||
target_sources(${NAME} PRIVATE ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE heat_smp_common)
|
||||
endmacro()
|
||||
|
||||
add_executable(heat_seq solver_seq.c)
|
||||
target_link_libraries(heat_seq PUBLIC heat_seq_common)
|
||||
macro(mk_heat_nanos6 NAME SOURCE)
|
||||
mk_heat_smp(${NAME} ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
|
||||
endmacro()
|
||||
|
||||
macro(mk_heat_nodes NAME SOURCE)
|
||||
mk_heat_smp(${NAME} ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE Nodes::wrapper)
|
||||
endmacro()
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
add_library(heat_smp_common STATIC main.c)
|
||||
target_link_libraries(heat_smp_common PUBLIC heat_common)
|
||||
|
||||
mk_heat_smp(b6_heat_seq solver_seq.c)
|
||||
|
||||
if(NANOS6_FOUND)
|
||||
macro(mk_heat_nanos6 NAME SOURCE)
|
||||
add_executable(${NAME} ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE heat_seq_common Nanos6::wrapper)
|
||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
||||
endmacro()
|
||||
mk_heat_nanos6(heat_nanos6 solver_ompss2.c)
|
||||
mk_heat_nanos6(heat_nanos6_residual solver_ompss2_residual.c)
|
||||
mk_heat_nanos6(b6_heat_nanos6 solver_ompss2.c)
|
||||
mk_heat_nanos6(b6_heat_nanos6_residual solver_ompss2_residual.c)
|
||||
endif()
|
||||
|
||||
if(NODES_FOUND)
|
||||
macro(mk_heat_nodes NAME SOURCE)
|
||||
add_executable(${NAME} ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE heat_seq_common Nodes::wrapper)
|
||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
||||
endmacro()
|
||||
mk_heat_nodes(heat_nodes solver_ompss2.c)
|
||||
mk_heat_nodes(heat_nodes_residual solver_ompss2_residual.c)
|
||||
mk_heat_nodes(b6_heat_nodes solver_ompss2.c)
|
||||
mk_heat_nodes(b6_heat_nodes_residual solver_ompss2_residual.c)
|
||||
endif()
|
||||
|
@ -35,13 +35,16 @@ int main(int argc, char **argv)
|
||||
int threads = 1;
|
||||
#endif
|
||||
|
||||
fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s",
|
||||
"rows", "cols", "rbs", "cbs", "threads",
|
||||
"steps", "error", "time", "updates/s\n");
|
||||
fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n",
|
||||
fprintf(stderr, "%14s %14s %14s %8s %8s %8s %8s %8s %8s\n",
|
||||
"time", "updates/s", "error",
|
||||
"rows", "cols",
|
||||
"rbs", "cbs", "threads",
|
||||
"steps");
|
||||
fprintf(stdout, "%14e %14e %14e %8ld %8ld %8d %8d %8d %8d\n",
|
||||
end-start, throughput, residual,
|
||||
conf.rows, conf.cols,
|
||||
conf.rbs, conf.cbs, threads,
|
||||
conf.convergenceTimesteps, residual, end-start, throughput);
|
||||
conf.convergenceTimesteps);
|
||||
|
||||
if (conf.generateImage)
|
||||
writeImage(conf.imageFileName, conf.matrix, rows, cols);
|
||||
|
@ -1,5 +1,10 @@
|
||||
#include "common/heat.h"
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Parallel version using OmpSs-2 tasks";
|
||||
}
|
||||
|
||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
|
||||
{
|
||||
|
@ -2,6 +2,12 @@
|
||||
|
||||
#include "common/heat.h"
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Parallel version using OmpSs-2 tasks and taking into account the\n"
|
||||
"residual";
|
||||
}
|
||||
|
||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual)
|
||||
{
|
||||
|
@ -1,5 +1,10 @@
|
||||
#include "common/heat.h"
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Parallel version using OmpSs-2 tasklook";
|
||||
}
|
||||
|
||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
|
||||
{
|
||||
|
@ -1,5 +1,10 @@
|
||||
#include "common/heat.h"
|
||||
|
||||
const char *
|
||||
summary(void)
|
||||
{
|
||||
return "Sequential solver with one CPU";
|
||||
}
|
||||
|
||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, double M[rows][cols])
|
||||
{
|
||||
|
12
src/ompss2/CMakeLists.txt
Normal file
@ -0,0 +1,12 @@
|
||||
macro(mk_nanos6 NAME SOURCE)
|
||||
mk_bench(${NAME})
|
||||
target_sources(${NAME} PRIVATE ${SOURCE})
|
||||
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
|
||||
endmacro()
|
||||
|
||||
if(NANOS6_FOUND)
|
||||
mk_bench6(b6_nanos6_register_deps register_deps.c)
|
||||
mk_bench6(b6_nanos6_sched_add sched_add.c)
|
||||
mk_bench6(b6_nanos6_sched_get sched_get.c)
|
||||
mk_bench6(b6_nanos6_readywave readywave.c)
|
||||
endif()
|
@ -1,6 +1,4 @@
|
||||
## Bench6: A set of micro-benchmarks for Nanos6
|
||||
|
||||
This repository contains a set of microbenchmarks for Nanos6,
|
||||
This directory contains a set of microbenchmarks for Nanos6,
|
||||
specifically target to expose the limitations of the runtime following
|
||||
the breakdown analysis.
|
||||
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 118 KiB After Width: | Height: | Size: 118 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB |
7
src/tools/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list)
|
||||
configure_file(config.in.h config.h)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
add_executable(bench6_runner runner.c)
|
||||
target_link_libraries(bench6_runner PRIVATE m bench6_lib)
|
||||
install(TARGETS bench6_runner RUNTIME DESTINATION bin)
|
11
src/tools/config.in.h
Normal file
@ -0,0 +1,11 @@
|
||||
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#define BENCH6_PREFIX "@CMAKE_INSTALL_PREFIX@"
|
||||
#define BENCH6_BIN (BENCH6_PREFIX "/bin")
|
||||
#define BENCH6_LIST "@BENCH6_LIST@"
|
||||
|
||||
#endif /* CONFIG_H */
|
199
src/tools/runner.c
Normal file
@ -0,0 +1,199 @@
|
||||
#include "common.h"
|
||||
#include "config.h"
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
//static void
|
||||
//usage(void)
|
||||
//{
|
||||
// exit(1);
|
||||
//}
|
||||
|
||||
struct sampling {
|
||||
int nmax;
|
||||
int nmin;
|
||||
int n;
|
||||
double *samples;
|
||||
double rse;
|
||||
double last;
|
||||
};
|
||||
|
||||
static int
|
||||
do_run(char *argv[], double *ptime)
|
||||
{
|
||||
/* Gather binary path */
|
||||
char path[PATH_MAX];
|
||||
sprintf(path, "%s/%s", BENCH6_BIN, argv[0]);
|
||||
|
||||
if (access(path, R_OK | X_OK) != 0) {
|
||||
err("cannot find benchmark %s:", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int pipefd[2];
|
||||
if (pipe(pipefd) != 0) {
|
||||
err("pipe failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Fork */
|
||||
pid_t p = fork();
|
||||
|
||||
if (p < 0) {
|
||||
err("fork failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* In children execute benchmark */
|
||||
if (p == 0) {
|
||||
close(pipefd[0]);
|
||||
dup2(pipefd[1], 1);
|
||||
close(2);
|
||||
if (execve(path, argv, NULL) != 0) {
|
||||
err("execve failed:");
|
||||
return -1;
|
||||
}
|
||||
/* Not reached */
|
||||
} else {
|
||||
close(pipefd[1]);
|
||||
char line[4096];
|
||||
FILE *f = fdopen(pipefd[0], "r");
|
||||
if (f == NULL) {
|
||||
err("fdopen failed:");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (fgets(line, 4096, f) == NULL) {
|
||||
err("missing stdout line");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *nl = strchr(line, '\n');
|
||||
if (nl != NULL)
|
||||
*nl = '\0';
|
||||
|
||||
double time;
|
||||
sscanf(line, "%le", &time);
|
||||
//printf("got %e\n", time);
|
||||
*ptime = time;
|
||||
|
||||
/* Drain the rest of the stdout */
|
||||
while (fgets(line, 4096, f) != NULL) { }
|
||||
fclose(f);
|
||||
close(pipefd[0]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
stats(struct sampling *s)
|
||||
{
|
||||
if (s->n < 2)
|
||||
return;
|
||||
|
||||
double n = s->n;
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < s->n; i++)
|
||||
sum += s->samples[i];
|
||||
|
||||
double mean = sum / n;
|
||||
double sumsqr = 0.0;
|
||||
for (int i = 0; i < s->n; i++) {
|
||||
double dev = s->samples[i] - mean;
|
||||
sumsqr += dev * dev;
|
||||
}
|
||||
|
||||
double var = sumsqr / n;
|
||||
double stdev = sqrt(var);
|
||||
double se = stdev / sqrt(n);
|
||||
double rse = se * 1.96 / mean;
|
||||
|
||||
fprintf(stderr, "\rn=%d last=%e mean=%e stdev=%e se=%e rse=%e",
|
||||
s->n, s->last, mean, stdev, se, rse);
|
||||
|
||||
s->rse = rse;
|
||||
}
|
||||
|
||||
static int
|
||||
should_continue(struct sampling *s)
|
||||
{
|
||||
stats(s);
|
||||
|
||||
if (s->n < s->nmin)
|
||||
return 1;
|
||||
|
||||
if (s->rse * 100.0 > 1.0 /* % */)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
add_sample(struct sampling *s, double time)
|
||||
{
|
||||
if (s->n >= s->nmax) {
|
||||
die("overflowing samples");
|
||||
} else {
|
||||
s->samples[s->n] = time;
|
||||
s->n++;
|
||||
s->last = time;
|
||||
}
|
||||
}
|
||||
|
||||
//static int
|
||||
//compare_double(const void *a, const void *b)
|
||||
//{
|
||||
// double aa = *(const double *) a;
|
||||
// double bb = *(const double *) b;
|
||||
//
|
||||
// if (aa < bb)
|
||||
// return -1;
|
||||
// else if (aa > bb)
|
||||
// return +1;
|
||||
// else
|
||||
// return 0;
|
||||
//}
|
||||
|
||||
static int
|
||||
sample(char *argv[])
|
||||
{
|
||||
struct sampling s = { 0 };
|
||||
s.nmax = 4000;
|
||||
s.nmin = 30;
|
||||
s.samples = calloc(s.nmax, sizeof(double));
|
||||
s.n = 0;
|
||||
|
||||
while (should_continue(&s)) {
|
||||
double time;
|
||||
if (do_run(argv, &time) != 0) {
|
||||
err("failed to run benchmark");
|
||||
return 1;
|
||||
}
|
||||
|
||||
add_sample(&s, time);
|
||||
}
|
||||
|
||||
free(s.samples);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void) argc;
|
||||
|
||||
if (sample(argv+1) != 0) {
|
||||
err("failed to sample the benchmark");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|