diff --git a/.gitignore b/.gitignore index 378eac2..62d7226 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -build +data +bench6.* diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 7a4c67b..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2022 Barcelona Supercomputing Center (BSC) -# SPDX-License-Identifier: GPL-3.0-or-later - -cmake_minimum_required(VERSION 3.10) - -project(BENCH6 LANGUAGES C) - -add_compile_options(-Wall -Wextra -Wformat - -Wmissing-prototypes -Wstrict-prototypes - #-Wconversion -Wsign-conversion - -Wold-style-definition -pedantic - -Werror -) - -set(CMAKE_C_COMPILER "clang") - -set(CMAKE_C_STANDARD 11) -set(CMAKE_C_EXTENSIONS FALSE) - -add_executable(bench6 - src/main.c - src/common.c - src/sched_get.c - src/sched_add.c - src/register_deps.c) - -target_compile_options(bench6 PRIVATE -fompss-2) -target_link_options(bench6 PRIVATE -fompss-2) - -install(TARGETS bench6 RUNTIME DESTINATION bin) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1f0a786 --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +CC=clang +CFLAGS=-O3 -fompss-2 + +BENCHMARKS=\ + sched_get \ + sched_add \ + register_deps + +BIN=$(addprefix bench6.,$(BENCHMARKS)) +DATA=$(addsuffix .csv, $(addprefix data/,$(BENCHMARKS))) +PLOT=$(DATA:=.png) + +all: $(BIN) $(DATA) $(PLOT) + +bench6.%: src/%.c src/common.c + $(CC) $(CFLAGS) -o $@ $^ + +data/%.csv.png: data/%.csv plot/%.R + Rscript plot/$(*F).R $< + +data/%.csv: bench6.% + mkdir -p data + ./$^ > $@ diff --git a/plot/register_deps.R b/plot/register_deps.R new file mode 100644 index 0000000..b327a09 --- /dev/null +++ b/plot/register_deps.R @@ -0,0 +1,34 @@ +library(ggplot2) +library(dplyr, warn.conflicts = FALSE) +library(scales) +library(jsonlite) +library(readr) + +# Load the arguments (argv) +args = commandArgs(trailingOnly=TRUE) + +input_file = args[1] + +df = read_delim(input_file, delim=",", comment="#") %>% + mutate(ndeps = as.factor(ndeps)) %>% + mutate(time_per_task = time_per_task * 1e6) %>% + group_by(ndeps) %>% + mutate(median_time = median(time_per_task)) %>% + ungroup() + +dpi = 300 +h = 6 +w = 15 + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=ndeps, y=time_per_task)) + + geom_boxplot() + + theme_bw() + + labs( + x="Number of dependencies per task", + y="Creation and registration time per task (us)", + title="bench6.register_deps: registration time vs number of dependencies") + +ggsave(sprintf("%s.png", input_file), plot=p, width=w, height=h, dpi=dpi) + diff --git a/plot/sched_add.R b/plot/sched_add.R new file mode 100644 index 0000000..9e00228 --- /dev/null +++ b/plot/sched_add.R @@ -0,0 +1,39 @@ +library(ggplot2) +library(dplyr, warn.conflicts = FALSE) +library(scales) +library(jsonlite) +library(readr) + +# Load the arguments (argv) +args = commandArgs(trailingOnly=TRUE) + +input_file = args[1] + +df = read_delim(input_file, delim=",", comment="#") %>% + mutate(run = as.factor(run)) %>% + mutate(time_per_task_per_cpu = time_per_task_per_cpu * 1e9) + +dpi = 150 +h = 2 +w = 6 + +# --------------------------------------------------------------------- + +#p = ggplot(df, aes(x=run, y=time_per_task)) + +# geom_point() + +# theme_bw() + +# labs( +# x = "Number of run", +# y="get_ready_task() time (ns)", +# title="Nanos6: get ready task time") + +p = ggplot(df, aes(x=time_per_task_per_cpu)) + + geom_boxplot() + + theme_bw() + + scale_x_continuous(breaks = breaks_pretty(10)) + + labs( + x="Duration per task per CPU (ns / task * CPU)", + title="bench6.sched_add: time to unblock N tasks") + +ggsave(sprintf("%s.png", input_file), plot=p, width=w, height=h, dpi=dpi) + diff --git a/plot/sched_get.R b/plot/sched_get.R new file mode 100644 index 0000000..0d4f94d --- /dev/null +++ b/plot/sched_get.R @@ -0,0 +1,39 @@ +library(ggplot2) +library(dplyr, warn.conflicts = FALSE) +library(scales) +library(jsonlite) +library(readr) + +# Load the arguments (argv) +args = commandArgs(trailingOnly=TRUE) + +input_file = args[1] + +df = read_delim(input_file, delim=",", comment="#") %>% + mutate(run = as.factor(run)) %>% + mutate(time_per_task_per_cpu = time_per_task_per_cpu * 1e9) + +dpi = 150 +h = 2 +w = 6 + +# --------------------------------------------------------------------- + +#p = ggplot(df, aes(x=run, y=time_per_task_per_cpu)) + +# geom_point() + +# theme_bw() + +# labs( +# x = "Number of run", +# y="get_ready_task() time (ns)", +# title="Nanos6: get ready task time") + +p = ggplot(df, aes(x=time_per_task_per_cpu)) + + geom_boxplot() + + theme_bw() + + scale_x_continuous(breaks = breaks_pretty(10)) + + labs( + x="Average duration per task per CPU (ns / task * CPU)", + title="bench6.sched_get: average time to schedule N ready tasks") + +ggsave(sprintf("%s.png", input_file), plot=p, width=w, height=h, dpi=dpi) + diff --git a/src/bench6.h b/src/bench6.h index 10d6f1d..d182559 100644 --- a/src/bench6.h +++ b/src/bench6.h @@ -7,6 +7,7 @@ #define UNUSED(x) (void)(x) double get_time(void); +int get_ncpus(); int bench6_creator(int argc, char *argv[]); int bench6_sched_get(int argc, char *argv[]); diff --git a/src/common.c b/src/common.c index fd8f276..c4e7ac9 100644 --- a/src/common.c +++ b/src/common.c @@ -5,6 +5,7 @@ #include "bench6.h" +#include #include #include #include @@ -22,3 +23,8 @@ double get_time() return (double)(tv.tv_sec) + (double)tv.tv_nsec * 1.0e-9; } + +int get_ncpus() +{ + return (int) nanos6_get_num_cpus(); +} diff --git a/src/register_deps.c b/src/register_deps.c index 01edd96..df36c09 100644 --- a/src/register_deps.c +++ b/src/register_deps.c @@ -18,7 +18,8 @@ static long ntasks = 100; static int usage(char *argv[]) { - fprintf(stderr, "Usage: bench6 %s [-r NRUNS] [-t NTASKS]\n", argv[0]); + fprintf(stderr, "Bench6: A set of Nanos6 micro-benchmarks\n"); + fprintf(stderr, "Usage: %s [-r NRUNS] [-t NTASKS]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Measure the time it takes to create and register NTASKS tasks\n" @@ -49,7 +50,7 @@ do_run(int run) } int -bench6_register_deps(int argc, char *argv[]) +main(int argc, char *argv[]) { int opt; diff --git a/src/sched_add.c b/src/sched_add.c index 6e57942..608977e 100644 --- a/src/sched_add.c +++ b/src/sched_add.c @@ -13,6 +13,7 @@ #include #include +static int ncpus = -1; static long nruns = 30L; static long ntasks = 10000L; @@ -22,7 +23,8 @@ static void **handle; static int usage(char *argv[]) { - fprintf(stderr, "Usage: bench6 %s [-r NRUNS] [-t NTASKS]\n", argv[0]); + fprintf(stderr, "Bench6: A set of Nanos6 micro-benchmarks\n"); + fprintf(stderr, "Usage: %s [-r NRUNS] [-t NTASKS]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Measure the time it takes to unblock NTASKS which will end\n" @@ -66,11 +68,11 @@ do_run(int run) double t1 = get_time(); printf("%d,%ld,%e,%e\n", run, ntasks, (t1 - t0), - (t1 - t0) / ((double) ntasks)); + (t1 - t0) / ((double) ntasks) * ((double) ncpus)); } int -bench6_sched_add(int argc, char *argv[]) +main(int argc, char *argv[]) { int opt; while ((opt = getopt(argc, argv, "hr:t:")) != -1) { @@ -87,6 +89,8 @@ bench6_sched_add(int argc, char *argv[]) } } + ncpus = get_ncpus(); + handle = calloc(ntasks, sizeof(void *)); if (handle == NULL) { @@ -94,7 +98,7 @@ bench6_sched_add(int argc, char *argv[]) return -1; } - printf("%s,%s,%s,%s\n", "run", "ntasks", "time_us", "time_per_task"); + printf("%s,%s,%s,%s\n", "run", "ntasks", "time", "time_per_task_per_cpu"); for (int run = 0; run < nruns; run++) do_run(run); diff --git a/src/sched_get.c b/src/sched_get.c index 2842abb..2852d6e 100644 --- a/src/sched_get.c +++ b/src/sched_get.c @@ -10,6 +10,7 @@ #include #include +static int ncpus = -1; static long nruns = 30L; static long ntasks = 20000L; @@ -18,7 +19,8 @@ static atomic_int wait = 0; static int usage(char *argv[]) { - fprintf(stderr, "Usage: bench6 %s [-r NRUNS] [-t NTASKS]\n", argv[0]); + fprintf(stderr, "Bench6: A set of Nanos6 micro-benchmarks\n"); + fprintf(stderr, "Usage: %s [-r NRUNS] [-t NTASKS]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Creates NTASKS tasks without dependencies, but the tasks don't\n" @@ -48,13 +50,13 @@ do_run(int run) #pragma oss taskwait double t1 = get_time(); - printf("%d,%ld,%e,%e\n", - run, ntasks, (t1 - t0), - (t1 - t0) / ((double) ntasks)); + printf("%d,%ld,%d,%e,%e\n", + run, ntasks, ncpus, (t1 - t0), + (t1 - t0) / ((double) ntasks) * ((double) ncpus)); } int -bench6_sched_get(int argc, char *argv[]) +main(int argc, char *argv[]) { int opt; @@ -72,7 +74,9 @@ bench6_sched_get(int argc, char *argv[]) } } - printf("%s,%s,%s,%s\n", "run", "ntasks", "time", "time_per_task"); + ncpus = get_ncpus(); + + printf("%s,%s,%s,%s,%s\n", "run", "ntasks", "ncpus", "time", "time_per_task_per_cpu"); for (int run = 0; run < nruns; run++) do_run(run);