Reorganice sources
3
.gitignore
vendored
@ -1 +1,4 @@
|
|||||||
data
|
data
|
||||||
|
build/
|
||||||
|
install/
|
||||||
|
tags
|
||||||
|
@ -18,9 +18,6 @@ add_compile_options(
|
|||||||
-Werror
|
-Werror
|
||||||
)
|
)
|
||||||
|
|
||||||
set(CMAKE_C_COMPILER "clang")
|
|
||||||
set(CMAKE_CXX_COMPILER "clang++")
|
|
||||||
|
|
||||||
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||||
|
|
||||||
set(CMAKE_C_STANDARD 11)
|
set(CMAKE_C_STANDARD 11)
|
||||||
@ -76,6 +73,23 @@ find_package(MPI)
|
|||||||
find_package(Nanos6)
|
find_package(Nanos6)
|
||||||
find_package(Nodes)
|
find_package(Nodes)
|
||||||
|
|
||||||
|
set_property(GLOBAL PROPERTY bench6_list "")
|
||||||
|
|
||||||
|
macro(mk_bench NAME)
|
||||||
|
if(NOT "${NAME}" MATCHES "b6_.*")
|
||||||
|
message(FATAL_ERROR "benchmark name must begin with b6_: ${NAME}")
|
||||||
|
endif()
|
||||||
|
add_executable(${NAME})
|
||||||
|
|
||||||
|
get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list)
|
||||||
|
message(STATUS "Before BENCH6_LIST=${BENCH6_LIST}")
|
||||||
|
list(APPEND BENCH6_LIST ${NAME})
|
||||||
|
message(STATUS "After BENCH6_LIST=${BENCH6_LIST}")
|
||||||
|
set_property(GLOBAL PROPERTY bench6_list "${BENCH6_LIST}")
|
||||||
|
|
||||||
|
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
|
||||||
include(FeatureSummary)
|
include(FeatureSummary)
|
||||||
|
@ -1,2 +1,4 @@
|
|||||||
add_subdirectory(bench6)
|
add_subdirectory(bench6)
|
||||||
|
add_subdirectory(ompss2)
|
||||||
add_subdirectory(heat)
|
add_subdirectory(heat)
|
||||||
|
add_subdirectory(tools)
|
||||||
|
@ -1,13 +1,2 @@
|
|||||||
add_library(bench6_common STATIC common.c)
|
add_library(bench6_lib STATIC bench6.c common.c)
|
||||||
|
target_include_directories(bench6_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
macro(mk_bench6 NAME SOURCE)
|
|
||||||
add_executable(${NAME} ${SOURCE})
|
|
||||||
target_link_libraries(${NAME} PRIVATE bench6_common Nanos6::wrapper)
|
|
||||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
|
||||||
endmacro()
|
|
||||||
|
|
||||||
# FIXME: broken in last clang
|
|
||||||
#mk_bench6(bench6_register_deps register_deps.c)
|
|
||||||
mk_bench6(bench6_sched_add sched_add.c)
|
|
||||||
mk_bench6(bench6_sched_get sched_get.c)
|
|
||||||
mk_bench6(bench6_readywave readywave.c)
|
|
||||||
|
35
src/bench6/bench6.c
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC)
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||||
|
|
||||||
|
#include "bench6.h"
|
||||||
|
|
||||||
|
//#include <nanos6/debug.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* Returns the current time in seconds since some point in the past */
|
||||||
|
double bench6_time(void)
|
||||||
|
{
|
||||||
|
struct timespec tv;
|
||||||
|
if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0)
|
||||||
|
{
|
||||||
|
perror("clock_gettime failed");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (double)(tv.tv_sec) +
|
||||||
|
(double)tv.tv_nsec * 1.0e-9;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_ncpus(void)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
//return (int) nanos6_get_num_cpus();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
bench6_report(double time)
|
||||||
|
{
|
||||||
|
printf("time %e\n", time);
|
||||||
|
}
|
@ -4,14 +4,9 @@
|
|||||||
#ifndef BENCH6_H
|
#ifndef BENCH6_H
|
||||||
#define BENCH6_H
|
#define BENCH6_H
|
||||||
|
|
||||||
#define UNUSED(x) (void)(x)
|
double bench6_time(void);
|
||||||
|
void bench6_report(double time);
|
||||||
|
|
||||||
double get_time(void);
|
|
||||||
int get_ncpus(void);
|
int get_ncpus(void);
|
||||||
|
|
||||||
int bench6_creator(int argc, char *argv[]);
|
|
||||||
int bench6_sched_get(int argc, char *argv[]);
|
|
||||||
int bench6_sched_add(int argc, char *argv[]);
|
|
||||||
int bench6_register_deps(int argc, char *argv[]);
|
|
||||||
|
|
||||||
#endif /* BENCH6_H */
|
#endif /* BENCH6_H */
|
||||||
|
@ -1,28 +1,69 @@
|
|||||||
/* Copyright (c) 2022 Barcelona Supercomputing Center (BSC)
|
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
|
||||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||||
|
|
||||||
#include "bench6.h"
|
#include "common.h"
|
||||||
|
|
||||||
#include <nanos6/debug.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
/* Returns the current time in seconds since some point in the past */
|
char *progname = NULL;
|
||||||
double get_time(void)
|
int is_debug_enabled = 0;
|
||||||
|
|
||||||
|
void
|
||||||
|
progname_set(char *name)
|
||||||
{
|
{
|
||||||
struct timespec tv;
|
progname = name;
|
||||||
if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0)
|
|
||||||
{
|
|
||||||
perror("clock_gettime failed");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (double)(tv.tv_sec) +
|
void
|
||||||
(double)tv.tv_nsec * 1.0e-9;
|
enable_debug(void)
|
||||||
|
{
|
||||||
|
is_debug_enabled = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_ncpus(void)
|
static void
|
||||||
|
vaerr(const char *prefix, const char *func, const char *errstr, va_list ap)
|
||||||
{
|
{
|
||||||
return (int) nanos6_get_num_cpus();
|
if (progname != NULL)
|
||||||
|
fprintf(stderr, "%s: ", progname);
|
||||||
|
|
||||||
|
if (prefix != NULL)
|
||||||
|
fprintf(stderr, "%s: ", prefix);
|
||||||
|
|
||||||
|
if (func != NULL)
|
||||||
|
fprintf(stderr, "%s: ", func);
|
||||||
|
|
||||||
|
vfprintf(stderr, errstr, ap);
|
||||||
|
|
||||||
|
int len = strlen(errstr);
|
||||||
|
|
||||||
|
if (len > 0) {
|
||||||
|
char last = errstr[len - 1];
|
||||||
|
if (last == ':')
|
||||||
|
fprintf(stderr, " %s\n", strerror(errno));
|
||||||
|
else if (last != '\n' && last != '\r')
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
verr(const char *prefix, const char *func, const char *errstr, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, errstr);
|
||||||
|
vaerr(prefix, func, errstr, ap);
|
||||||
|
va_end(ap);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
vdie(const char *prefix, const char *func, const char *errstr, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, errstr);
|
||||||
|
vaerr(prefix, func, errstr, ap);
|
||||||
|
va_end(ap);
|
||||||
|
abort();
|
||||||
}
|
}
|
||||||
|
46
src/bench6/common.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||||
|
|
||||||
|
#ifndef COMMON_H
|
||||||
|
#define COMMON_H
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
extern int is_debug_enabled;
|
||||||
|
|
||||||
|
/* Debug macros */
|
||||||
|
|
||||||
|
void progname_set(char *name);
|
||||||
|
void enable_debug(void);
|
||||||
|
void verr(const char *prefix, const char *func, const char *errstr, ...);
|
||||||
|
void vdie(const char *prefix, const char *func, const char *errstr, ...);
|
||||||
|
|
||||||
|
/* clang-format off */
|
||||||
|
|
||||||
|
#define rerr(...) fprintf(stderr, __VA_ARGS__)
|
||||||
|
#define err(...) verr("ERROR", __func__, __VA_ARGS__)
|
||||||
|
#define die(...) vdie("FATAL", __func__, __VA_ARGS__)
|
||||||
|
#define info(...) verr("INFO", NULL, __VA_ARGS__)
|
||||||
|
#define finfo(...) verr("INFO", __func__, __VA_ARGS__)
|
||||||
|
#define warn(...) verr("WARN", NULL, __VA_ARGS__)
|
||||||
|
|
||||||
|
#define dbg(...) do { \
|
||||||
|
if (unlikely(is_debug_enabled)) verr("DEBUG", __func__, __VA_ARGS__); \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||||||
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||||
|
#define UNUSED(x) (void)(x)
|
||||||
|
|
||||||
|
/* Poison assert */
|
||||||
|
#pragma GCC poison assert
|
||||||
|
|
||||||
|
#define USE_RET __attribute__((warn_unused_result))
|
||||||
|
|
||||||
|
#define ARRAYLEN(x) (sizeof(x)/sizeof((x)[0]))
|
||||||
|
|
||||||
|
/* clang-format on */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* COMMON_H */
|
@ -1,5 +1,6 @@
|
|||||||
add_library(heat_common STATIC common/misc.c common/kernel.c)
|
add_library(heat_common STATIC common/misc.c common/kernel.c)
|
||||||
target_include_directories(heat_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
target_include_directories(heat_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_link_libraries(heat_common PUBLIC m)
|
||||||
|
|
||||||
add_subdirectory(smp)
|
add_subdirectory(smp)
|
||||||
add_subdirectory(mpi)
|
add_subdirectory(mpi)
|
||||||
|
@ -3,11 +3,11 @@
|
|||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
#define IGNORE_RESIDUAL ((double) -1.0)
|
#define IGNORE_RESIDUAL ((double) -1.0)
|
||||||
#define DEFAULT_DELTA ((double) 0.00005)
|
#define DEFAULT_DELTA ((double) 0.00005)
|
||||||
#define DEFAULT_BS 1024
|
#define DEFAULT_BS 1024
|
||||||
#define MAX_STRING_SIZE 100
|
|
||||||
|
|
||||||
#define ROUND(a, b) ((((a) + (b) - 1) / (b)) * (b))
|
#define ROUND(a, b) ((((a) + (b) - 1) / (b)) * (b))
|
||||||
|
|
||||||
@ -30,8 +30,8 @@ typedef struct {
|
|||||||
double *matrix;
|
double *matrix;
|
||||||
int numHeatSources;
|
int numHeatSources;
|
||||||
HeatSource *heatSources;
|
HeatSource *heatSources;
|
||||||
char confFileName[MAX_STRING_SIZE];
|
char confFileName[PATH_MAX];
|
||||||
char imageFileName[MAX_STRING_SIZE];
|
char imageFileName[PATH_MAX];
|
||||||
bool generateImage;
|
bool generateImage;
|
||||||
bool warmup;
|
bool warmup;
|
||||||
bool verbose;
|
bool verbose;
|
||||||
@ -47,6 +47,7 @@ void printConfiguration(const HeatConfiguration *conf);
|
|||||||
void initializeMatrix(const HeatConfiguration *conf, double *matrix, int64_t rows, int64_t cols, int64_t rowOffset);
|
void initializeMatrix(const HeatConfiguration *conf, double *matrix, int64_t rows, int64_t cols, int64_t rowOffset);
|
||||||
double getTime(void);
|
double getTime(void);
|
||||||
|
|
||||||
|
const char *summary(void);
|
||||||
double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData);
|
double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData);
|
||||||
void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
|
void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
|
||||||
double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
|
double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
|
||||||
|
52
src/heat/common/main.c
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
HeatConfiguration conf;
|
||||||
|
readConfiguration(argc, argv, &conf);
|
||||||
|
refineConfiguration(&conf, conf.rbs, conf.cbs);
|
||||||
|
if (conf.verbose)
|
||||||
|
printConfiguration(&conf);
|
||||||
|
|
||||||
|
int64_t rows = conf.rows+2;
|
||||||
|
int64_t cols = conf.cols+2;
|
||||||
|
|
||||||
|
initialize(&conf, rows, cols, 0);
|
||||||
|
|
||||||
|
if (conf.warmup)
|
||||||
|
solve(&conf, rows, cols, 1, NULL);
|
||||||
|
|
||||||
|
// Solve the problem
|
||||||
|
double start = getTime();
|
||||||
|
double residual = solve(&conf, rows, cols, conf.timesteps, NULL);
|
||||||
|
double end = getTime();
|
||||||
|
|
||||||
|
int64_t totalElements = conf.rows*conf.cols;
|
||||||
|
double throughput = (totalElements*conf.timesteps)/(end-start);
|
||||||
|
|
||||||
|
#ifdef _OMPSS_2
|
||||||
|
int threads = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
#else
|
||||||
|
int threads = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s",
|
||||||
|
"rows", "cols", "rbs", "cbs", "threads",
|
||||||
|
"steps", "error", "time", "updates/s\n");
|
||||||
|
fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n",
|
||||||
|
conf.rows, conf.cols,
|
||||||
|
conf.rbs, conf.cbs, threads,
|
||||||
|
conf.convergenceTimesteps, residual, end-start, throughput);
|
||||||
|
|
||||||
|
if (conf.generateImage)
|
||||||
|
writeImage(conf.imageFileName, conf.matrix, rows, cols);
|
||||||
|
|
||||||
|
finalize(&conf);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -1,11 +1,11 @@
|
|||||||
#define _POSIX_C_SOURCE 200809L
|
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
|
#include <libgen.h>
|
||||||
|
#include <limits.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@ -105,7 +105,11 @@ static void printUsage(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
(void) argc;
|
(void) argc;
|
||||||
|
|
||||||
fprintf(stdout, "Usage: %s [OPTION]...\n", argv[0]);
|
const char *prog = basename(argv[0]);
|
||||||
|
fprintf(stdout, "%s - %s\n", prog, summary());
|
||||||
|
fprintf(stdout, "\n");
|
||||||
|
fprintf(stdout, "Usage: %s [OPTION]...\n", prog);
|
||||||
|
fprintf(stdout, "\n");
|
||||||
fprintf(stdout, "Parameters:\n");
|
fprintf(stdout, "Parameters:\n");
|
||||||
fprintf(stdout, " -s, --size=SIZE use SIZExSIZE matrix as the surface\n");
|
fprintf(stdout, " -s, --size=SIZE use SIZExSIZE matrix as the surface\n");
|
||||||
fprintf(stdout, " -r, --rows=ROWS use ROWS as the number of rows of the surface\n");
|
fprintf(stdout, " -r, --rows=ROWS use ROWS as the number of rows of the surface\n");
|
||||||
@ -176,7 +180,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
|
|||||||
conf->verbose = true;
|
conf->verbose = true;
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
if (strlen(optarg) >= MAX_STRING_SIZE) {
|
if (strlen(optarg) >= PATH_MAX) {
|
||||||
fprintf(stderr, "Error: Configuration name is too long!\n");
|
fprintf(stderr, "Error: Configuration name is too long!\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -185,7 +189,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
|
|||||||
case 'o':
|
case 'o':
|
||||||
conf->generateImage = true;
|
conf->generateImage = true;
|
||||||
conf->warmup = false;
|
conf->warmup = false;
|
||||||
if (strlen(optarg) >= MAX_STRING_SIZE) {
|
if (strlen(optarg) >= PATH_MAX) {
|
||||||
fprintf(stderr, "Error: Image name is too long!\n");
|
fprintf(stderr, "Error: Image name is too long!\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -263,8 +267,8 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf)
|
|||||||
|
|
||||||
static void readSourcesFile(HeatConfiguration *conf, FILE *file)
|
static void readSourcesFile(HeatConfiguration *conf, FILE *file)
|
||||||
{
|
{
|
||||||
char line[MAX_STRING_SIZE];
|
char line[4096];
|
||||||
if (!fgets(line, MAX_STRING_SIZE, file)) {
|
if (!fgets(line, 4096, file)) {
|
||||||
fprintf(stderr, "Error: Configuration file is not correct!\n");
|
fprintf(stderr, "Error: Configuration file is not correct!\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -283,7 +287,7 @@ static void readSourcesFile(HeatConfiguration *conf, FILE *file)
|
|||||||
assert(conf->heatSources != NULL);
|
assert(conf->heatSources != NULL);
|
||||||
|
|
||||||
for (int i = 0; i < conf->numHeatSources; i++) {
|
for (int i = 0; i < conf->numHeatSources; i++) {
|
||||||
if (!fgets(line, MAX_STRING_SIZE, file)) {
|
if (!fgets(line, 4096, file)) {
|
||||||
fprintf(stderr, "Error: Configuration file is not correct!\n");
|
fprintf(stderr, "Error: Configuration file is not correct!\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
@ -2,34 +2,36 @@ if(NOT MPI_FOUND)
|
|||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_library(heat_mpi_common STATIC main.c utils.c)
|
|
||||||
target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C)
|
|
||||||
|
|
||||||
macro(mk_heat_mpi NAME SOURCE)
|
macro(mk_heat_mpi NAME SOURCE)
|
||||||
add_executable(${NAME} ${SOURCE})
|
mk_bench(${NAME})
|
||||||
|
target_sources(${NAME} PRIVATE ${SOURCE})
|
||||||
target_link_libraries(${NAME} PRIVATE heat_mpi_common)
|
target_link_libraries(${NAME} PRIVATE heat_mpi_common)
|
||||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
mk_heat_mpi(heat_mpi solver_mpi.c)
|
|
||||||
mk_heat_mpi(heat_mpi_nbuffer solver_mpi_nbuffer.c)
|
|
||||||
|
|
||||||
if(NANOS6_FOUND)
|
|
||||||
macro(mk_heat_mpi_nanos6 NAME SOURCE)
|
macro(mk_heat_mpi_nanos6 NAME SOURCE)
|
||||||
mk_heat_mpi(${NAME} ${SOURCE})
|
mk_heat_mpi(${NAME} ${SOURCE})
|
||||||
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
|
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
mk_heat_mpi_nanos6(heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c)
|
|
||||||
mk_heat_mpi_nanos6(heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NODES_FOUND)
|
|
||||||
macro(mk_heat_mpi_nodes NAME SOURCE)
|
macro(mk_heat_mpi_nodes NAME SOURCE)
|
||||||
mk_heat_mpi(${NAME} ${SOURCE})
|
mk_heat_mpi(${NAME} ${SOURCE})
|
||||||
target_link_libraries(${NAME} PRIVATE Nodes::wrapper)
|
target_link_libraries(${NAME} PRIVATE Nodes::wrapper)
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
mk_heat_mpi_nodes(heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c)
|
# -------------------------------------------------------------------
|
||||||
mk_heat_mpi_nodes(heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c)
|
|
||||||
|
add_library(heat_mpi_common STATIC main.c utils.c)
|
||||||
|
target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C)
|
||||||
|
|
||||||
|
mk_heat_mpi(b6_heat_mpi solver_mpi.c)
|
||||||
|
mk_heat_mpi(b6_heat_mpi_nbuffer solver_mpi_nbuffer.c)
|
||||||
|
|
||||||
|
if(NANOS6_FOUND)
|
||||||
|
mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c)
|
||||||
|
mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NODES_FOUND)
|
||||||
|
mk_heat_mpi_nodes(b6_heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c)
|
||||||
|
mk_heat_mpi_nodes(b6_heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c)
|
||||||
endif()
|
endif()
|
||||||
|
@ -3,6 +3,11 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "common/heat.h"
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Parallel version using MPI and blocking primitives";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void send(const double *data, int nelems, int dst, int tag)
|
static inline void send(const double *data, int nelems, int dst, int tag)
|
||||||
{
|
{
|
||||||
|
@ -8,6 +8,13 @@ typedef struct {
|
|||||||
MPI_Request recv;
|
MPI_Request recv;
|
||||||
} HaloRequests;
|
} HaloRequests;
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Parallel version using MPI and non-blocking primitives with\n"
|
||||||
|
"overlap of computation and communication phases";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void isend(const double *data, int nelems, int dst, int tag, HaloRequests *reqs)
|
static inline void isend(const double *data, int nelems, int dst, int tag, HaloRequests *reqs)
|
||||||
{
|
{
|
||||||
MPI_Isend(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD, &reqs->send);
|
MPI_Isend(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD, &reqs->send);
|
||||||
|
@ -3,6 +3,12 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "common/heat.h"
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Parallel version using MPI + OmpSs-2 following a fork-join\n"
|
||||||
|
"parallelization.";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void send(const double *data, int nelems, int dst, int tag)
|
static inline void send(const double *data, int nelems, int dst, int tag)
|
||||||
{
|
{
|
||||||
|
@ -3,9 +3,15 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "common/heat.h"
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
|
||||||
static int serial;
|
static int serial;
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Parallel version using MPI + OmpSs-2 tasks where communication\n"
|
||||||
|
"tasks are serialized";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void send(const double *data, int nelems, int dst, int tag)
|
static inline void send(const double *data, int nelems, int dst, int tag)
|
||||||
{
|
{
|
||||||
MPI_Send(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD);
|
MPI_Send(data, nelems, MPI_DOUBLE, dst, tag, MPI_COMM_WORLD);
|
||||||
|
@ -1,25 +1,32 @@
|
|||||||
add_library(heat_seq_common STATIC main.c)
|
macro(mk_heat_smp NAME SOURCE)
|
||||||
target_link_libraries(heat_seq_common PUBLIC heat_common)
|
mk_bench(${NAME})
|
||||||
|
target_sources(${NAME} PRIVATE ${SOURCE})
|
||||||
|
target_link_libraries(${NAME} PRIVATE heat_smp_common)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
add_executable(heat_seq solver_seq.c)
|
macro(mk_heat_nanos6 NAME SOURCE)
|
||||||
target_link_libraries(heat_seq PUBLIC heat_seq_common)
|
mk_heat_smp(${NAME} ${SOURCE})
|
||||||
|
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
macro(mk_heat_nodes NAME SOURCE)
|
||||||
|
mk_heat_smp(${NAME} ${SOURCE})
|
||||||
|
target_link_libraries(${NAME} PRIVATE Nodes::wrapper)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------
|
||||||
|
|
||||||
|
add_library(heat_smp_common STATIC main.c)
|
||||||
|
target_link_libraries(heat_smp_common PUBLIC heat_common)
|
||||||
|
|
||||||
|
mk_heat_smp(b6_heat_seq solver_seq.c)
|
||||||
|
|
||||||
if(NANOS6_FOUND)
|
if(NANOS6_FOUND)
|
||||||
macro(mk_heat_nanos6 NAME SOURCE)
|
mk_heat_nanos6(b6_heat_nanos6 solver_ompss2.c)
|
||||||
add_executable(${NAME} ${SOURCE})
|
mk_heat_nanos6(b6_heat_nanos6_residual solver_ompss2_residual.c)
|
||||||
target_link_libraries(${NAME} PRIVATE heat_seq_common Nanos6::wrapper)
|
|
||||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
|
||||||
endmacro()
|
|
||||||
mk_heat_nanos6(heat_nanos6 solver_ompss2.c)
|
|
||||||
mk_heat_nanos6(heat_nanos6_residual solver_ompss2_residual.c)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NODES_FOUND)
|
if(NODES_FOUND)
|
||||||
macro(mk_heat_nodes NAME SOURCE)
|
mk_heat_nodes(b6_heat_nodes solver_ompss2.c)
|
||||||
add_executable(${NAME} ${SOURCE})
|
mk_heat_nodes(b6_heat_nodes_residual solver_ompss2_residual.c)
|
||||||
target_link_libraries(${NAME} PRIVATE heat_seq_common Nodes::wrapper)
|
|
||||||
install(TARGETS ${NAME} RUNTIME DESTINATION bin)
|
|
||||||
endmacro()
|
|
||||||
mk_heat_nodes(heat_nodes solver_ompss2.c)
|
|
||||||
mk_heat_nodes(heat_nodes_residual solver_ompss2_residual.c)
|
|
||||||
endif()
|
endif()
|
||||||
|
@ -35,13 +35,16 @@ int main(int argc, char **argv)
|
|||||||
int threads = 1;
|
int threads = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s",
|
fprintf(stderr, "%14s %14s %14s %8s %8s %8s %8s %8s %8s\n",
|
||||||
"rows", "cols", "rbs", "cbs", "threads",
|
"time", "updates/s", "error",
|
||||||
"steps", "error", "time", "updates/s\n");
|
"rows", "cols",
|
||||||
fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n",
|
"rbs", "cbs", "threads",
|
||||||
|
"steps");
|
||||||
|
fprintf(stdout, "%14e %14e %14e %8ld %8ld %8d %8d %8d %8d\n",
|
||||||
|
end-start, throughput, residual,
|
||||||
conf.rows, conf.cols,
|
conf.rows, conf.cols,
|
||||||
conf.rbs, conf.cbs, threads,
|
conf.rbs, conf.cbs, threads,
|
||||||
conf.convergenceTimesteps, residual, end-start, throughput);
|
conf.convergenceTimesteps);
|
||||||
|
|
||||||
if (conf.generateImage)
|
if (conf.generateImage)
|
||||||
writeImage(conf.imageFileName, conf.matrix, rows, cols);
|
writeImage(conf.imageFileName, conf.matrix, rows, cols);
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
#include "common/heat.h"
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Parallel version using OmpSs-2 tasks";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
|
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
|
||||||
{
|
{
|
||||||
|
@ -2,6 +2,12 @@
|
|||||||
|
|
||||||
#include "common/heat.h"
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Parallel version using OmpSs-2 tasks and taking into account the\n"
|
||||||
|
"residual";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual)
|
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual)
|
||||||
{
|
{
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
#include "common/heat.h"
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Parallel version using OmpSs-2 tasklook";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
|
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb])
|
||||||
{
|
{
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
#include "common/heat.h"
|
#include "common/heat.h"
|
||||||
|
|
||||||
|
const char *
|
||||||
|
summary(void)
|
||||||
|
{
|
||||||
|
return "Sequential solver with one CPU";
|
||||||
|
}
|
||||||
|
|
||||||
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, double M[rows][cols])
|
static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, double M[rows][cols])
|
||||||
{
|
{
|
||||||
|
12
src/ompss2/CMakeLists.txt
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
macro(mk_nanos6 NAME SOURCE)
|
||||||
|
mk_bench(${NAME})
|
||||||
|
target_sources(${NAME} PRIVATE ${SOURCE})
|
||||||
|
target_link_libraries(${NAME} PRIVATE Nanos6::wrapper)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
if(NANOS6_FOUND)
|
||||||
|
mk_bench6(b6_nanos6_register_deps register_deps.c)
|
||||||
|
mk_bench6(b6_nanos6_sched_add sched_add.c)
|
||||||
|
mk_bench6(b6_nanos6_sched_get sched_get.c)
|
||||||
|
mk_bench6(b6_nanos6_readywave readywave.c)
|
||||||
|
endif()
|
@ -1,6 +1,4 @@
|
|||||||
## Bench6: A set of micro-benchmarks for Nanos6
|
This directory contains a set of microbenchmarks for Nanos6,
|
||||||
|
|
||||||
This repository contains a set of microbenchmarks for Nanos6,
|
|
||||||
specifically target to expose the limitations of the runtime following
|
specifically target to expose the limitations of the runtime following
|
||||||
the breakdown analysis.
|
the breakdown analysis.
|
||||||
|
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 118 KiB After Width: | Height: | Size: 118 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB |
7
src/tools/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list)
|
||||||
|
configure_file(config.in.h config.h)
|
||||||
|
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
|
||||||
|
add_executable(bench6_runner runner.c)
|
||||||
|
target_link_libraries(bench6_runner PRIVATE m bench6_lib)
|
||||||
|
install(TARGETS bench6_runner RUNTIME DESTINATION bin)
|
11
src/tools/config.in.h
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||||
|
|
||||||
|
#ifndef CONFIG_H
|
||||||
|
#define CONFIG_H
|
||||||
|
|
||||||
|
#define BENCH6_PREFIX "@CMAKE_INSTALL_PREFIX@"
|
||||||
|
#define BENCH6_BIN (BENCH6_PREFIX "/bin")
|
||||||
|
#define BENCH6_LIST "@BENCH6_LIST@"
|
||||||
|
|
||||||
|
#endif /* CONFIG_H */
|
199
src/tools/runner.c
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
#include "common.h"
|
||||||
|
#include "config.h"
|
||||||
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
//static void
|
||||||
|
//usage(void)
|
||||||
|
//{
|
||||||
|
// exit(1);
|
||||||
|
//}
|
||||||
|
|
||||||
|
struct sampling {
|
||||||
|
int nmax;
|
||||||
|
int nmin;
|
||||||
|
int n;
|
||||||
|
double *samples;
|
||||||
|
double rse;
|
||||||
|
double last;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int
|
||||||
|
do_run(char *argv[], double *ptime)
|
||||||
|
{
|
||||||
|
/* Gather binary path */
|
||||||
|
char path[PATH_MAX];
|
||||||
|
sprintf(path, "%s/%s", BENCH6_BIN, argv[0]);
|
||||||
|
|
||||||
|
if (access(path, R_OK | X_OK) != 0) {
|
||||||
|
err("cannot find benchmark %s:", path);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pipefd[2];
|
||||||
|
if (pipe(pipefd) != 0) {
|
||||||
|
err("pipe failed:");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fork */
|
||||||
|
pid_t p = fork();
|
||||||
|
|
||||||
|
if (p < 0) {
|
||||||
|
err("fork failed:");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* In children execute benchmark */
|
||||||
|
if (p == 0) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
dup2(pipefd[1], 1);
|
||||||
|
close(2);
|
||||||
|
if (execve(path, argv, NULL) != 0) {
|
||||||
|
err("execve failed:");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
/* Not reached */
|
||||||
|
} else {
|
||||||
|
close(pipefd[1]);
|
||||||
|
char line[4096];
|
||||||
|
FILE *f = fdopen(pipefd[0], "r");
|
||||||
|
if (f == NULL) {
|
||||||
|
err("fdopen failed:");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fgets(line, 4096, f) == NULL) {
|
||||||
|
err("missing stdout line");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *nl = strchr(line, '\n');
|
||||||
|
if (nl != NULL)
|
||||||
|
*nl = '\0';
|
||||||
|
|
||||||
|
double time;
|
||||||
|
sscanf(line, "%le", &time);
|
||||||
|
//printf("got %e\n", time);
|
||||||
|
*ptime = time;
|
||||||
|
|
||||||
|
/* Drain the rest of the stdout */
|
||||||
|
while (fgets(line, 4096, f) != NULL) { }
|
||||||
|
fclose(f);
|
||||||
|
close(pipefd[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
stats(struct sampling *s)
|
||||||
|
{
|
||||||
|
if (s->n < 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
double n = s->n;
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int i = 0; i < s->n; i++)
|
||||||
|
sum += s->samples[i];
|
||||||
|
|
||||||
|
double mean = sum / n;
|
||||||
|
double sumsqr = 0.0;
|
||||||
|
for (int i = 0; i < s->n; i++) {
|
||||||
|
double dev = s->samples[i] - mean;
|
||||||
|
sumsqr += dev * dev;
|
||||||
|
}
|
||||||
|
|
||||||
|
double var = sumsqr / n;
|
||||||
|
double stdev = sqrt(var);
|
||||||
|
double se = stdev / sqrt(n);
|
||||||
|
double rse = se * 1.96 / mean;
|
||||||
|
|
||||||
|
fprintf(stderr, "\rn=%d last=%e mean=%e stdev=%e se=%e rse=%e",
|
||||||
|
s->n, s->last, mean, stdev, se, rse);
|
||||||
|
|
||||||
|
s->rse = rse;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
should_continue(struct sampling *s)
|
||||||
|
{
|
||||||
|
stats(s);
|
||||||
|
|
||||||
|
if (s->n < s->nmin)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (s->rse * 100.0 > 1.0 /* % */)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
add_sample(struct sampling *s, double time)
|
||||||
|
{
|
||||||
|
if (s->n >= s->nmax) {
|
||||||
|
die("overflowing samples");
|
||||||
|
} else {
|
||||||
|
s->samples[s->n] = time;
|
||||||
|
s->n++;
|
||||||
|
s->last = time;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//static int
|
||||||
|
//compare_double(const void *a, const void *b)
|
||||||
|
//{
|
||||||
|
// double aa = *(const double *) a;
|
||||||
|
// double bb = *(const double *) b;
|
||||||
|
//
|
||||||
|
// if (aa < bb)
|
||||||
|
// return -1;
|
||||||
|
// else if (aa > bb)
|
||||||
|
// return +1;
|
||||||
|
// else
|
||||||
|
// return 0;
|
||||||
|
//}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sample(char *argv[])
|
||||||
|
{
|
||||||
|
struct sampling s = { 0 };
|
||||||
|
s.nmax = 4000;
|
||||||
|
s.nmin = 30;
|
||||||
|
s.samples = calloc(s.nmax, sizeof(double));
|
||||||
|
s.n = 0;
|
||||||
|
|
||||||
|
while (should_continue(&s)) {
|
||||||
|
double time;
|
||||||
|
if (do_run(argv, &time) != 0) {
|
||||||
|
err("failed to run benchmark");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
add_sample(&s, time);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(s.samples);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
(void) argc;
|
||||||
|
|
||||||
|
if (sample(argv+1) != 0) {
|
||||||
|
err("failed to sample the benchmark");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|