From 4beb069627d09134899c156edc0a4cb46d8bd1c5 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 21 Oct 2020 18:18:43 +0200 Subject: [PATCH] WIP: postprocessing pipeline Now each run is executed in a independent folder --- garlic/exp/nbody/tampi.nix | 17 +++-- .../fig/nbody/{freeCpu/plot.R => freeCpu.R} | 29 +++++--- garlic/fig/nbody/freeCpu/default.nix | 67 ------------------ .../fig/nbody/{jemalloc/plot.R => jemalloc.R} | 64 ++++++++++------- garlic/fig/nbody/jemalloc/default.nix | 68 ------------------- garlic/fig/nbody/{test/plot.R => test.R} | 0 garlic/fig/nbody/test/default.nix | 62 ----------------- garlic/fig/shell.nix | 14 ---- garlic/{postprocess => pp}/fetch.nix | 11 ++- garlic/{postprocess => pp}/hist/default.nix | 0 garlic/{postprocess => pp}/hist/hist.sh | 0 garlic/pp/merge.nix | 16 +++++ garlic/{postprocess => pp}/result.nix | 0 garlic/pp/rplot.nix | 33 +++++++++ garlic/pp/timeResult.nix | 30 ++++++++ garlic/pp/timetable.nix | 31 +++++++++ garlic/stages/control.nix | 3 + garlic/stages/sbatch.nix | 2 + garlic/stages/srun.nix | 4 ++ overlay.nix | 60 +++++++++------- 20 files changed, 232 insertions(+), 279 deletions(-) rename garlic/fig/nbody/{freeCpu/plot.R => freeCpu.R} (72%) delete mode 100644 garlic/fig/nbody/freeCpu/default.nix rename garlic/fig/nbody/{jemalloc/plot.R => jemalloc.R} (52%) delete mode 100644 garlic/fig/nbody/jemalloc/default.nix rename garlic/fig/nbody/{test/plot.R => test.R} (100%) delete mode 100644 garlic/fig/nbody/test/default.nix delete mode 100644 garlic/fig/shell.nix rename garlic/{postprocess => pp}/fetch.nix (71%) rename garlic/{postprocess => pp}/hist/default.nix (100%) rename garlic/{postprocess => pp}/hist/hist.sh (100%) create mode 100644 garlic/pp/merge.nix rename garlic/{postprocess => pp}/result.nix (100%) create mode 100644 garlic/pp/rplot.nix create mode 100644 garlic/pp/timeResult.nix create mode 100644 garlic/pp/timetable.nix diff --git a/garlic/exp/nbody/tampi.nix b/garlic/exp/nbody/tampi.nix index f049b03..a245110 100644 --- a/garlic/exp/nbody/tampi.nix +++ b/garlic/exp/nbody/tampi.nix @@ -5,7 +5,9 @@ , targetMachine , stages , enableJemalloc ? false -, enableFreeCpu ? false + +# Leave the first CPU per socket unused? +, freeCpu ? false }: with stdenv.lib; @@ -37,6 +39,7 @@ let mpi = impi; gitBranch = "garlic/tampi+send+oss+task"; cflags = "-g"; + inherit enableJemalloc; # Repeat the execution of each unit 30 times loops = 10; @@ -46,9 +49,15 @@ let ntasksPerNode = hw.socketsPerNode; nodes = 1; time = "02:00:00"; - cpuBind = if (enableFreeCpu) - then "verbose,mask_cpu:0x7fffff,0x7fffff000000" - else "sockets,verbose"; + + + # If we want to leave one CPU per socket unused + inherit freeCpu; + + cpuBind = if (freeCpu) + then "verbose,mask_cpu:0xfffffe,0xfffffe000000" + else "verbose,sockets"; + jobName = "bs-${toString blocksize}-${gitBranch}-nbody"; }; diff --git a/garlic/fig/nbody/freeCpu/plot.R b/garlic/fig/nbody/freeCpu.R similarity index 72% rename from garlic/fig/nbody/freeCpu/plot.R rename to garlic/fig/nbody/freeCpu.R index e87328b..1564353 100644 --- a/garlic/fig/nbody/freeCpu/plot.R +++ b/garlic/fig/nbody/freeCpu.R @@ -1,21 +1,30 @@ library(ggplot2) library(dplyr) library(scales) +library(jsonlite) -# Load the dataset -#df=read.table("/nix/store/zcyazjbcjn2lhxrpa3bs5y7rw3bbcgnr-plot/data.csv", -df=read.table("data.csv", - col.names=c("variant", "blocksize", "time")) +args=commandArgs(trailingOnly=TRUE) + +# Read the timetable from args[1] +input_file = "timetable.json.gz" +if (length(args)>0) { input_file = args[1] } + +# Load the dataset in NDJSON format +dataset = jsonlite::stream_in(file(input_file)) %>% + jsonlite::flatten() + +# We only need the cpu bind, blocksize and time +df = select(dataset, config.freeCpu, config.blocksize, time) %>% + rename(blocksize=config.blocksize, freeCpu=config.freeCpu) # Use the blocksize as factor df$blocksize = as.factor(df$blocksize) +df$freeCpu = as.factor(df$freeCpu) # Split by malloc variant - -D=df %>% group_by(variant, blocksize) %>% +D=df %>% group_by(freeCpu, blocksize) %>% mutate(tnorm = time / median(time) - 1) - bs_unique = unique(df$blocksize) nbs=length(bs_unique) @@ -49,7 +58,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) + linetype="dashed", color="red") + # Draw boxplots - geom_boxplot(aes(fill=variant)) + + geom_boxplot(aes(fill=freeCpu)) + # # Use log2 scale in x # scale_x_continuous(trans=log2_trans(), @@ -64,7 +73,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) + theme(legend.position = c(0.85, 0.85)) #+ # Place each variant group in one separate plot - #facet_wrap(~variant) + #facet_wrap(~freeCpu) @@ -77,7 +86,7 @@ dev.off() png("scatter.png", width=w*ppi, height=h*ppi, res=ppi) # ## Create the plot with the normalized time vs blocksize -p = ggplot(D, aes(x=blocksize, y=time, color=variant)) + +p = ggplot(D, aes(x=blocksize, y=time, color=freeCpu)) + labs(x="Block size", y="Time (s)", title="Nbody granularity", diff --git a/garlic/fig/nbody/freeCpu/default.nix b/garlic/fig/nbody/freeCpu/default.nix deleted file mode 100644 index 4c4fa31..0000000 --- a/garlic/fig/nbody/freeCpu/default.nix +++ /dev/null @@ -1,67 +0,0 @@ -{ - stdenv -, gnuplot -, jq -, garlicTools -, resultFromTrebuchet -, writeText -, rWrapper -, rPackages - -# The two results to be compared -, resDefault -, resFreeCpu -}: - -with garlicTools; -with stdenv.lib; - -let - customR = rWrapper.override { - packages = with rPackages; [ tidyverse ]; - }; - - plotScript = ./plot.R; - -in stdenv.mkDerivation { - name = "plot"; - buildInputs = [ jq gnuplot customR ]; - preferLocalBuild = true; - dontPatchShebangs = true; - - src = ./.; - - buildPhase = '' - echo default = ${resDefault} - echo freeCpu = ${resFreeCpu} - - substituteAllInPlace plot.R - sed -ie "s:@expResult@:$out:g" plot.R - - for unit in ${resDefault}/*/*; do - name=$(basename $unit) - log="$unit/stdout.log" - conf="$unit/garlic_config.json" - bs=$(jq .blocksize $conf) - awk "/^time /{print \"default\", $bs, \$2}" $log >> data.csv - done - - for unit in ${resFreeCpu}/*/*; do - name=$(basename $unit) - log="$unit/stdout.log" - conf="$unit/garlic_config.json" - bs=$(jq .blocksize $conf) - awk "/^time /{print \"freeCpu\", $bs, \$2}" $log >> data.csv - done - - Rscript plot.R - ''; - - installPhase = '' - mkdir $out - ln -s ${resFreeCpu} $out/resFreeCpu - ln -s ${resDefault} $out/resDefault - cp *.png $out/ - cp *.csv $out/ - ''; -} diff --git a/garlic/fig/nbody/jemalloc/plot.R b/garlic/fig/nbody/jemalloc.R similarity index 52% rename from garlic/fig/nbody/jemalloc/plot.R rename to garlic/fig/nbody/jemalloc.R index 0bb5306..71b6951 100644 --- a/garlic/fig/nbody/jemalloc/plot.R +++ b/garlic/fig/nbody/jemalloc.R @@ -1,20 +1,31 @@ library(ggplot2) library(dplyr) library(scales) +library(jsonlite) -# Load the dataset -df=read.table("/nix/store/vvfcimwp8mkv6kc5fs3rbyjy8grgpmmb-plot/data.csv", - col.names=c("variant", "blocksize", "time")) +args=commandArgs(trailingOnly=TRUE) + +# Read the timetable from args[1] +input_file = "timetable.json.gz" +if (length(args)>0) { input_file = args[1] } + +# Load the dataset in NDJSON format +dataset = jsonlite::stream_in(file(input_file)) %>% + jsonlite::flatten() + +# We only need the cpu bind, blocksize and time +df = select(dataset, config.enableJemalloc, config.blocksize, time) %>% + rename(blocksize=config.blocksize, + jemalloc=config.enableJemalloc) # Use the blocksize as factor df$blocksize = as.factor(df$blocksize) +df$jemalloc = as.factor(df$jemalloc) # Split by malloc variant - -D=df %>% group_by(variant, blocksize) %>% +D=df %>% group_by(jemalloc, blocksize) %>% mutate(tnorm = time / median(time) - 1) - bs_unique = unique(df$blocksize) nbs=length(bs_unique) @@ -35,7 +46,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) + # Labels labs(x="Block size", y="Normalized time", title="Nbody normalized time", - subtitle="@expResult@") + + subtitle=input_file) + # Center the title #theme(plot.title = element_text(hjust = 0.5)) + @@ -48,7 +59,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) + linetype="dashed", color="red") + # Draw boxplots - geom_boxplot(aes(fill=variant)) + + geom_boxplot(aes(fill=freeCpu)) + # # Use log2 scale in x # scale_x_continuous(trans=log2_trans(), @@ -58,10 +69,11 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) + theme_bw() + + theme(plot.subtitle=element_text(size=10)) + + theme(legend.position = c(0.85, 0.85)) #+ # Place each variant group in one separate plot - #facet_wrap(~variant) @@ -71,22 +83,22 @@ print(p) ## Save the png image dev.off() # -#png("scatter.png", width=w*ppi, height=h*ppi, res=ppi) +png("scatter.png", width=w*ppi, height=h*ppi, res=ppi) # ## Create the plot with the normalized time vs blocksize -#p = ggplot(D, aes(x=blocksize, y=time, color=variant)) + -# -# labs(x="Block size", y="Time (s)", -# title="Nbody granularity", -# subtitle="@expResult@") + -# theme_bw() + -# -# geom_point(shape=21, size=3) + -# scale_x_continuous(trans=log2_trans()) + -# scale_y_continuous(trans=log2_trans()) -# -## Render the plot -#print(p) -# -## Save the png image -#dev.off() +p = ggplot(D, aes(x=blocksize, y=time, color=freeCpu)) + + + labs(x="Block size", y="Time (s)", + title="Nbody granularity", + subtitle=input_file) + + theme_bw() + + + geom_point(shape=21, size=3) + + #scale_x_continuous(trans=log2_trans()) + + scale_y_continuous(trans=log2_trans()) + +# Render the plot +print(p) + +# Save the png image +dev.off() diff --git a/garlic/fig/nbody/jemalloc/default.nix b/garlic/fig/nbody/jemalloc/default.nix deleted file mode 100644 index d2ac0cf..0000000 --- a/garlic/fig/nbody/jemalloc/default.nix +++ /dev/null @@ -1,68 +0,0 @@ -{ - stdenv -, gnuplot -, jq -, garlicTools -, resultFromTrebuchet -, writeText -, rWrapper -, rPackages - -# The two results to be compared -, resDefault -, resJemalloc -}: - -with garlicTools; -with stdenv.lib; - -let - customR = rWrapper.override { - packages = with rPackages; [ tidyverse ]; - }; - - plotScript = ./plot.R; - -in stdenv.mkDerivation { - name = "plot"; - buildInputs = [ jq gnuplot customR ]; - preferLocalBuild = true; - dontPatchShebangs = true; - - inherit resDefault resJemalloc; - - src = ./.; - - buildPhase = '' - echo default = ${resJemalloc} - echo jemalloc = ${resJemalloc} - - substituteAllInPlace plot.R - - for unit in ${resDefault}/*/*; do - name=$(basename $unit) - log="$unit/stdout.log" - conf="$unit/garlic_config.json" - bs=$(jq .blocksize $conf) - awk "/^time /{print \"default\", $bs, \$2}" $log >> data.csv - done - - for unit in ${resJemalloc}/*/*; do - name=$(basename $unit) - log="$unit/stdout.log" - conf="$unit/garlic_config.json" - bs=$(jq .blocksize $conf) - awk "/^time /{print \"jemalloc\", $bs, \$2}" $log >> data.csv - done - - #Rscript plot.R - ''; - - installPhase = '' - mkdir $out - ln -s ${resJemalloc} $out/resJemalloc - ln -s ${resDefault} $out/resDefault - #cp *.png $out/ - cp *.csv $out/ - ''; -} diff --git a/garlic/fig/nbody/test/plot.R b/garlic/fig/nbody/test.R similarity index 100% rename from garlic/fig/nbody/test/plot.R rename to garlic/fig/nbody/test.R diff --git a/garlic/fig/nbody/test/default.nix b/garlic/fig/nbody/test/default.nix deleted file mode 100644 index b23aba1..0000000 --- a/garlic/fig/nbody/test/default.nix +++ /dev/null @@ -1,62 +0,0 @@ -{ - stdenv -, gnuplot -, jq -, experiments -, garlicTools -, getExpResult -, writeText -, rWrapper -, rPackages -}: - -with garlicTools; -with stdenv.lib; - -let - experiment = builtins.elemAt experiments 0; - expResult = getExpResult { - garlicTemp = "/tmp/garlic-temp"; - trebuchetStage = experiment; - experimentStage = getExperimentStage experiment; - }; - - customR = rWrapper.override { - packages = with rPackages; [ tidyverse ]; - }; - - plotScript = ./plot.R; - -in stdenv.mkDerivation { - name = "plot"; - buildInputs = [ jq gnuplot customR ]; - preferLocalBuild = true; - dontPatchShebangs = true; - - inherit expResult; - - src = ./.; - - buildPhase = '' - echo "using results ${expResult}" - - substituteAllInPlace plot.R - - for unit in ${expResult}/*/*; do - name=$(basename $unit) - log="$unit/stdout.log" - conf="$unit/garlic_config.json" - bs=$(jq .blocksize $conf) - awk "/^time /{print $bs, \$2}" $log >> data.csv - done - - Rscript plot.R - ''; - - installPhase = '' - mkdir $out - ln -s ${expResult} $out/result - cp *.png $out/ - cp data.csv $out/ - ''; -} diff --git a/garlic/fig/shell.nix b/garlic/fig/shell.nix deleted file mode 100644 index f3b3988..0000000 --- a/garlic/fig/shell.nix +++ /dev/null @@ -1,14 +0,0 @@ -{ pkgs ? import ../../default.nix }: - -with pkgs; - -let - rWrapper = pkgs.rWrapper.override { - packages = with pkgs.rPackages; [ tidyverse ]; - }; -in -stdenv.mkDerivation { - name = "R"; - - buildInputs = [ rWrapper ]; -} diff --git a/garlic/postprocess/fetch.nix b/garlic/pp/fetch.nix similarity index 71% rename from garlic/postprocess/fetch.nix rename to garlic/pp/fetch.nix index 752021d..feeedda 100644 --- a/garlic/postprocess/fetch.nix +++ b/garlic/pp/fetch.nix @@ -13,12 +13,19 @@ , experimentStage , trebuchetStage , garlicTemp +# We only fetch the config, stdout and stderr by default +, fetchAll ? false }: with garlicTools; let experimentName = baseNameOf (toString experimentStage); + rsyncFilter = if (fetchAll) then "" else '' + --include='*/*/garlic_config.json' \ + --include='*/*/std*.log' \ + --include='*/*/*/std*.log' \ + --exclude='*/*/*/*' ''; in stdenv.mkDerivation { name = "fetch"; @@ -34,10 +41,10 @@ in export PATH=${rsync}/bin:${openssh}/bin:${nix}/bin rsync -av \ --copy-links \ - --include='*/*/*.log' --include='*/*/*.json' --exclude='*/*/*' \ + ${rsyncFilter} \ '${sshHost}:${prefix}/${experimentName}' ${garlicTemp} - res=\$(nix-build -E '(with import ./default.nix; garlic.getExpResult { \ + res=\$(nix-build -E '(with import ./default.nix; garlic.pp.getExpResult { \ experimentStage = "${experimentStage}"; \ trebuchetStage = "${trebuchetStage}"; \ garlicTemp = "${garlicTemp}"; \ diff --git a/garlic/postprocess/hist/default.nix b/garlic/pp/hist/default.nix similarity index 100% rename from garlic/postprocess/hist/default.nix rename to garlic/pp/hist/default.nix diff --git a/garlic/postprocess/hist/hist.sh b/garlic/pp/hist/hist.sh similarity index 100% rename from garlic/postprocess/hist/hist.sh rename to garlic/pp/hist/hist.sh diff --git a/garlic/pp/merge.nix b/garlic/pp/merge.nix new file mode 100644 index 0000000..bd83c46 --- /dev/null +++ b/garlic/pp/merge.nix @@ -0,0 +1,16 @@ +{ + stdenv +}: + +experiments: + +with stdenv.lib; + +stdenv.mkDerivation { + name = "merge.json"; + preferLocalBuild = true; + phases = [ "installPhase" ]; + installPhase = '' + cat ${concatStringsSep " " experiments} >> $out + ''; +} diff --git a/garlic/postprocess/result.nix b/garlic/pp/result.nix similarity index 100% rename from garlic/postprocess/result.nix rename to garlic/pp/result.nix diff --git a/garlic/pp/rplot.nix b/garlic/pp/rplot.nix new file mode 100644 index 0000000..aabc4b5 --- /dev/null +++ b/garlic/pp/rplot.nix @@ -0,0 +1,33 @@ +{ + stdenv +, rWrapper +, rPackages +}: + +{ +# The two results to be compared + dataset +, script +, extraRPackages ? [] +}: + +with stdenv.lib; + +let + customR = rWrapper.override { + packages = with rPackages; [ tidyverse ] ++ extraRPackages; + }; + +in stdenv.mkDerivation { + name = "plot"; + buildInputs = [ customR ]; + preferLocalBuild = true; + dontPatchShebangs = true; + phases = [ "installPhase" ]; + + installPhase = '' + mkdir -p $out + cd $out + Rscript --vanilla ${script} ${dataset} + ''; +} diff --git a/garlic/pp/timeResult.nix b/garlic/pp/timeResult.nix new file mode 100644 index 0000000..ec1b02a --- /dev/null +++ b/garlic/pp/timeResult.nix @@ -0,0 +1,30 @@ +{ + stdenv +}: + +inputResult: + +stdenv.mkDerivation { + name = "timeResult"; + preferLocalBuild = true; + phases = [ "installPhase" ]; + installPhase = '' + mkdir -p $out + cd ${inputResult} + for unit in *-experiment/*-unit; do + outunit=$out/$unit + mkdir -p $outunit + + # Copy the unit config + conf="$unit/garlic_config.json" + cp "$conf" "$outunit/garlic_config.json" + + # Merge all runs in one single CSV file + echo "run time" > $outunit/data.csv + for r in $(cd $unit; ls -d [0-9]* | sort -n); do + log="$unit/$r/stdout.log" + awk "/^time /{print \"$r\", \$2}" $log >> $outunit/data.csv + done + done + ''; +} diff --git a/garlic/pp/timetable.nix b/garlic/pp/timetable.nix new file mode 100644 index 0000000..8469249 --- /dev/null +++ b/garlic/pp/timetable.nix @@ -0,0 +1,31 @@ +{ + stdenv +, jq +}: + +inputResult: + +stdenv.mkDerivation { + name = "timetable.json"; + preferLocalBuild = true; + phases = [ "installPhase" ]; + buildInputs = [ jq ]; + installPhase = '' + touch $out + cd ${inputResult} + for exp in *-experiment; do + cd ${inputResult}/$exp + for unit in *-unit; do + cd ${inputResult}/$exp/$unit + conf=garlic_config.json + for run in $(ls -d [0-9]* | sort -n); do + time=$(awk '/^time /{print $2}' $run/stdout.log) + jq -cn "{ exp:\"$exp\", unit:\"$unit\", config:inputs, time:$time}" \ + $conf >> $out + done + done + done + + #gzip $out + ''; +} diff --git a/garlic/stages/control.nix b/garlic/stages/control.nix index f5166e3..51baed5 100644 --- a/garlic/stages/control.nix +++ b/garlic/stages/control.nix @@ -19,7 +19,10 @@ stdenv.mkDerivation { cat > $out <