WIP: postprocessing pipeline

Now each run is executed in a independent folder
This commit is contained in:
Rodrigo Arias 2020-10-21 18:18:43 +02:00
parent 1321b6a888
commit 4beb069627
20 changed files with 232 additions and 279 deletions

View File

@ -5,7 +5,9 @@
, targetMachine
, stages
, enableJemalloc ? false
, enableFreeCpu ? false
# Leave the first CPU per socket unused?
, freeCpu ? false
}:
with stdenv.lib;
@ -37,6 +39,7 @@ let
mpi = impi;
gitBranch = "garlic/tampi+send+oss+task";
cflags = "-g";
inherit enableJemalloc;
# Repeat the execution of each unit 30 times
loops = 10;
@ -46,9 +49,15 @@ let
ntasksPerNode = hw.socketsPerNode;
nodes = 1;
time = "02:00:00";
cpuBind = if (enableFreeCpu)
then "verbose,mask_cpu:0x7fffff,0x7fffff000000"
else "sockets,verbose";
# If we want to leave one CPU per socket unused
inherit freeCpu;
cpuBind = if (freeCpu)
then "verbose,mask_cpu:0xfffffe,0xfffffe000000"
else "verbose,sockets";
jobName = "bs-${toString blocksize}-${gitBranch}-nbody";
};

View File

@ -1,21 +1,30 @@
library(ggplot2)
library(dplyr)
library(scales)
library(jsonlite)
# Load the dataset
#df=read.table("/nix/store/zcyazjbcjn2lhxrpa3bs5y7rw3bbcgnr-plot/data.csv",
df=read.table("data.csv",
col.names=c("variant", "blocksize", "time"))
args=commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1]
input_file = "timetable.json.gz"
if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten()
# We only need the cpu bind, blocksize and time
df = select(dataset, config.freeCpu, config.blocksize, time) %>%
rename(blocksize=config.blocksize, freeCpu=config.freeCpu)
# Use the blocksize as factor
df$blocksize = as.factor(df$blocksize)
df$freeCpu = as.factor(df$freeCpu)
# Split by malloc variant
D=df %>% group_by(variant, blocksize) %>%
D=df %>% group_by(freeCpu, blocksize) %>%
mutate(tnorm = time / median(time) - 1)
bs_unique = unique(df$blocksize)
nbs=length(bs_unique)
@ -49,7 +58,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) +
linetype="dashed", color="red") +
# Draw boxplots
geom_boxplot(aes(fill=variant)) +
geom_boxplot(aes(fill=freeCpu)) +
# # Use log2 scale in x
# scale_x_continuous(trans=log2_trans(),
@ -64,7 +73,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) +
theme(legend.position = c(0.85, 0.85)) #+
# Place each variant group in one separate plot
#facet_wrap(~variant)
#facet_wrap(~freeCpu)
@ -77,7 +86,7 @@ dev.off()
png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
#
## Create the plot with the normalized time vs blocksize
p = ggplot(D, aes(x=blocksize, y=time, color=variant)) +
p = ggplot(D, aes(x=blocksize, y=time, color=freeCpu)) +
labs(x="Block size", y="Time (s)",
title="Nbody granularity",

View File

@ -1,67 +0,0 @@
{
stdenv
, gnuplot
, jq
, garlicTools
, resultFromTrebuchet
, writeText
, rWrapper
, rPackages
# The two results to be compared
, resDefault
, resFreeCpu
}:
with garlicTools;
with stdenv.lib;
let
customR = rWrapper.override {
packages = with rPackages; [ tidyverse ];
};
plotScript = ./plot.R;
in stdenv.mkDerivation {
name = "plot";
buildInputs = [ jq gnuplot customR ];
preferLocalBuild = true;
dontPatchShebangs = true;
src = ./.;
buildPhase = ''
echo default = ${resDefault}
echo freeCpu = ${resFreeCpu}
substituteAllInPlace plot.R
sed -ie "s:@expResult@:$out:g" plot.R
for unit in ${resDefault}/*/*; do
name=$(basename $unit)
log="$unit/stdout.log"
conf="$unit/garlic_config.json"
bs=$(jq .blocksize $conf)
awk "/^time /{print \"default\", $bs, \$2}" $log >> data.csv
done
for unit in ${resFreeCpu}/*/*; do
name=$(basename $unit)
log="$unit/stdout.log"
conf="$unit/garlic_config.json"
bs=$(jq .blocksize $conf)
awk "/^time /{print \"freeCpu\", $bs, \$2}" $log >> data.csv
done
Rscript plot.R
'';
installPhase = ''
mkdir $out
ln -s ${resFreeCpu} $out/resFreeCpu
ln -s ${resDefault} $out/resDefault
cp *.png $out/
cp *.csv $out/
'';
}

View File

@ -1,20 +1,31 @@
library(ggplot2)
library(dplyr)
library(scales)
library(jsonlite)
# Load the dataset
df=read.table("/nix/store/vvfcimwp8mkv6kc5fs3rbyjy8grgpmmb-plot/data.csv",
col.names=c("variant", "blocksize", "time"))
args=commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1]
input_file = "timetable.json.gz"
if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten()
# We only need the cpu bind, blocksize and time
df = select(dataset, config.enableJemalloc, config.blocksize, time) %>%
rename(blocksize=config.blocksize,
jemalloc=config.enableJemalloc)
# Use the blocksize as factor
df$blocksize = as.factor(df$blocksize)
df$jemalloc = as.factor(df$jemalloc)
# Split by malloc variant
D=df %>% group_by(variant, blocksize) %>%
D=df %>% group_by(jemalloc, blocksize) %>%
mutate(tnorm = time / median(time) - 1)
bs_unique = unique(df$blocksize)
nbs=length(bs_unique)
@ -35,7 +46,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) +
# Labels
labs(x="Block size", y="Normalized time",
title="Nbody normalized time",
subtitle="@expResult@") +
subtitle=input_file) +
# Center the title
#theme(plot.title = element_text(hjust = 0.5)) +
@ -48,7 +59,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) +
linetype="dashed", color="red") +
# Draw boxplots
geom_boxplot(aes(fill=variant)) +
geom_boxplot(aes(fill=freeCpu)) +
# # Use log2 scale in x
# scale_x_continuous(trans=log2_trans(),
@ -58,10 +69,11 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) +
theme_bw() +
theme(plot.subtitle=element_text(size=10)) +
theme(legend.position = c(0.85, 0.85)) #+
# Place each variant group in one separate plot
#facet_wrap(~variant)
@ -71,22 +83,22 @@ print(p)
## Save the png image
dev.off()
#
#png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
#
## Create the plot with the normalized time vs blocksize
#p = ggplot(D, aes(x=blocksize, y=time, color=variant)) +
#
# labs(x="Block size", y="Time (s)",
# title="Nbody granularity",
# subtitle="@expResult@") +
# theme_bw() +
#
# geom_point(shape=21, size=3) +
# scale_x_continuous(trans=log2_trans()) +
# scale_y_continuous(trans=log2_trans())
#
## Render the plot
#print(p)
#
## Save the png image
#dev.off()
p = ggplot(D, aes(x=blocksize, y=time, color=freeCpu)) +
labs(x="Block size", y="Time (s)",
title="Nbody granularity",
subtitle=input_file) +
theme_bw() +
geom_point(shape=21, size=3) +
#scale_x_continuous(trans=log2_trans()) +
scale_y_continuous(trans=log2_trans())
# Render the plot
print(p)
# Save the png image
dev.off()

View File

@ -1,68 +0,0 @@
{
stdenv
, gnuplot
, jq
, garlicTools
, resultFromTrebuchet
, writeText
, rWrapper
, rPackages
# The two results to be compared
, resDefault
, resJemalloc
}:
with garlicTools;
with stdenv.lib;
let
customR = rWrapper.override {
packages = with rPackages; [ tidyverse ];
};
plotScript = ./plot.R;
in stdenv.mkDerivation {
name = "plot";
buildInputs = [ jq gnuplot customR ];
preferLocalBuild = true;
dontPatchShebangs = true;
inherit resDefault resJemalloc;
src = ./.;
buildPhase = ''
echo default = ${resJemalloc}
echo jemalloc = ${resJemalloc}
substituteAllInPlace plot.R
for unit in ${resDefault}/*/*; do
name=$(basename $unit)
log="$unit/stdout.log"
conf="$unit/garlic_config.json"
bs=$(jq .blocksize $conf)
awk "/^time /{print \"default\", $bs, \$2}" $log >> data.csv
done
for unit in ${resJemalloc}/*/*; do
name=$(basename $unit)
log="$unit/stdout.log"
conf="$unit/garlic_config.json"
bs=$(jq .blocksize $conf)
awk "/^time /{print \"jemalloc\", $bs, \$2}" $log >> data.csv
done
#Rscript plot.R
'';
installPhase = ''
mkdir $out
ln -s ${resJemalloc} $out/resJemalloc
ln -s ${resDefault} $out/resDefault
#cp *.png $out/
cp *.csv $out/
'';
}

View File

@ -1,62 +0,0 @@
{
stdenv
, gnuplot
, jq
, experiments
, garlicTools
, getExpResult
, writeText
, rWrapper
, rPackages
}:
with garlicTools;
with stdenv.lib;
let
experiment = builtins.elemAt experiments 0;
expResult = getExpResult {
garlicTemp = "/tmp/garlic-temp";
trebuchetStage = experiment;
experimentStage = getExperimentStage experiment;
};
customR = rWrapper.override {
packages = with rPackages; [ tidyverse ];
};
plotScript = ./plot.R;
in stdenv.mkDerivation {
name = "plot";
buildInputs = [ jq gnuplot customR ];
preferLocalBuild = true;
dontPatchShebangs = true;
inherit expResult;
src = ./.;
buildPhase = ''
echo "using results ${expResult}"
substituteAllInPlace plot.R
for unit in ${expResult}/*/*; do
name=$(basename $unit)
log="$unit/stdout.log"
conf="$unit/garlic_config.json"
bs=$(jq .blocksize $conf)
awk "/^time /{print $bs, \$2}" $log >> data.csv
done
Rscript plot.R
'';
installPhase = ''
mkdir $out
ln -s ${expResult} $out/result
cp *.png $out/
cp data.csv $out/
'';
}

View File

@ -1,14 +0,0 @@
{ pkgs ? import ../../default.nix }:
with pkgs;
let
rWrapper = pkgs.rWrapper.override {
packages = with pkgs.rPackages; [ tidyverse ];
};
in
stdenv.mkDerivation {
name = "R";
buildInputs = [ rWrapper ];
}

View File

@ -13,12 +13,19 @@
, experimentStage
, trebuchetStage
, garlicTemp
# We only fetch the config, stdout and stderr by default
, fetchAll ? false
}:
with garlicTools;
let
experimentName = baseNameOf (toString experimentStage);
rsyncFilter = if (fetchAll) then "" else ''
--include='*/*/garlic_config.json' \
--include='*/*/std*.log' \
--include='*/*/*/std*.log' \
--exclude='*/*/*/*' '';
in
stdenv.mkDerivation {
name = "fetch";
@ -34,10 +41,10 @@ in
export PATH=${rsync}/bin:${openssh}/bin:${nix}/bin
rsync -av \
--copy-links \
--include='*/*/*.log' --include='*/*/*.json' --exclude='*/*/*' \
${rsyncFilter} \
'${sshHost}:${prefix}/${experimentName}' ${garlicTemp}
res=\$(nix-build -E '(with import ./default.nix; garlic.getExpResult { \
res=\$(nix-build -E '(with import ./default.nix; garlic.pp.getExpResult { \
experimentStage = "${experimentStage}"; \
trebuchetStage = "${trebuchetStage}"; \
garlicTemp = "${garlicTemp}"; \

16
garlic/pp/merge.nix Normal file
View File

@ -0,0 +1,16 @@
{
stdenv
}:
experiments:
with stdenv.lib;
stdenv.mkDerivation {
name = "merge.json";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
cat ${concatStringsSep " " experiments} >> $out
'';
}

33
garlic/pp/rplot.nix Normal file
View File

@ -0,0 +1,33 @@
{
stdenv
, rWrapper
, rPackages
}:
{
# The two results to be compared
dataset
, script
, extraRPackages ? []
}:
with stdenv.lib;
let
customR = rWrapper.override {
packages = with rPackages; [ tidyverse ] ++ extraRPackages;
};
in stdenv.mkDerivation {
name = "plot";
buildInputs = [ customR ];
preferLocalBuild = true;
dontPatchShebangs = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out
cd $out
Rscript --vanilla ${script} ${dataset}
'';
}

30
garlic/pp/timeResult.nix Normal file
View File

@ -0,0 +1,30 @@
{
stdenv
}:
inputResult:
stdenv.mkDerivation {
name = "timeResult";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out
cd ${inputResult}
for unit in *-experiment/*-unit; do
outunit=$out/$unit
mkdir -p $outunit
# Copy the unit config
conf="$unit/garlic_config.json"
cp "$conf" "$outunit/garlic_config.json"
# Merge all runs in one single CSV file
echo "run time" > $outunit/data.csv
for r in $(cd $unit; ls -d [0-9]* | sort -n); do
log="$unit/$r/stdout.log"
awk "/^time /{print \"$r\", \$2}" $log >> $outunit/data.csv
done
done
'';
}

31
garlic/pp/timetable.nix Normal file
View File

@ -0,0 +1,31 @@
{
stdenv
, jq
}:
inputResult:
stdenv.mkDerivation {
name = "timetable.json";
preferLocalBuild = true;
phases = [ "installPhase" ];
buildInputs = [ jq ];
installPhase = ''
touch $out
cd ${inputResult}
for exp in *-experiment; do
cd ${inputResult}/$exp
for unit in *-unit; do
cd ${inputResult}/$exp/$unit
conf=garlic_config.json
for run in $(ls -d [0-9]* | sort -n); do
time=$(awk '/^time /{print $2}' $run/stdout.log)
jq -cn "{ exp:\"$exp\", unit:\"$unit\", config:inputs, time:$time}" \
$conf >> $out
done
done
done
#gzip $out
'';
}

View File

@ -19,7 +19,10 @@ stdenv.mkDerivation {
cat > $out <<EOF
#!/bin/sh
for n in \$(seq 1 ${toString loops}); do
mkdir "\$n"
cd "\$n"
${stageProgram nextStage}
cd ..
done
EOF
chmod +x $out

View File

@ -14,6 +14,7 @@
, ntasks ? null
, ntasksPerNode ? null
, ntasksPerSocket ? null
, cpusPerTask ? null
, nodes ? null
, exclusive ? true # By default we run in exclusive mode
, qos ? null
@ -60,6 +61,7 @@ stdenv.mkDerivation rec {
+ sbatchOpt "ntasks" ntasks
+ sbatchOpt "ntasks-per-node" ntasksPerNode
+ sbatchOpt "ntasks-per-socket" ntasksPerSocket
+ sbatchOpt "cpus-per-task" cpusPerTask
+ sbatchOpt "nodes" nodes
+ sbatchOpt "chdir" chdir
+ sbatchOpt "output" output

View File

@ -8,6 +8,8 @@
, cpuBind
, nixPrefix
, srunOptions ? ""
, output ? "stdout.log"
, error ? "stderr.log"
}:
with garlicTools;
@ -23,6 +25,8 @@ stdenv.mkDerivation rec {
exec ${slurm}/bin/srun \
--mpi=pmi2 \
--cpu-bind=${cpuBind} \
--output=${output} \
--error=${error} \
${srunOptions} \
${nixPrefix}${stageProgram nextStage}
EOF

View File

@ -279,11 +279,12 @@ let
# Experiments
exp = {
nbody = {
nbody = rec {
test = callPackage ./garlic/exp/nbody/test.nix { };
tampi = callPackage ./garlic/exp/nbody/tampi.nix { };
baseline = tampi;
freeCpu = callPackage ./garlic/exp/nbody/tampi.nix {
enableFreeCpu = true;
freeCpu = true;
};
jemalloc = callPackage ./garlic/exp/nbody/tampi.nix {
enableJemalloc = true;
@ -310,37 +311,44 @@ let
};
};
hist = callPackage ./garlic/pp/hist { };
# Post processing tools
hist = callPackage ./garlic/postprocess/hist { };
getExpResult = callPackage ./garlic/postprocess/result.nix { };
resultFromTrebuchet = trebuchetStage: self.garlic.getExpResult {
garlicTemp = "/tmp/garlic-temp";
inherit trebuchetStage;
experimentStage = with self.bsc.garlicTools;
getExperimentStage trebuchetStage;
pp = rec {
getExpResult = callPackage ./garlic/pp/result.nix {
inherit fetchExperiment;
};
resultFromTrebuchet = trebuchetStage: getExpResult {
garlicTemp = "/tmp/garlic-temp";
inherit trebuchetStage;
experimentStage = with self.bsc.garlicTools;
getExperimentStage trebuchetStage;
};
fetchExperiment = callPackage ./garlic/pp/fetch.nix { };
timetable = callPackage ./garlic/pp/timetable.nix { };
rPlot = callPackage ./garlic/pp/rplot.nix { };
timetableFromTrebuchet = tre: timetable (resultFromTrebuchet tre);
mergeDatasets = callPackage ./garlic/pp/merge.nix { };
# Takes a list of experiments and returns a file that contains the
# all timetable results from the experiments.
merge = exps: mergeDatasets (map timetableFromTrebuchet exps);
};
fetchExperiment = callPackage ./garlic/postprocess/fetch.nix { };
# Figures generated from the experiments
fig = {
fig = with self.bsc.garlic; {
nbody = {
test = callPackage ./garlic/fig/nbody/test/default.nix {
experiments = [
self.bsc.garlic.exp.nbody.tampi
];
jemalloc = pp.rPlot {
script = ./garlic/fig/nbody/jemalloc.R;
dataset = with exp.nbody; pp.merge [ baseline jemalloc ];
};
jemalloc = callPackage ./garlic/fig/nbody/jemalloc/default.nix {
resDefault = self.garlic.resultFromTrebuchet
self.bsc.garlic.exp.nbody.tampi;
resJemalloc = self.garlic.resultFromTrebuchet
self.bsc.garlic.exp.nbody.jemalloc;
};
freeCpu = callPackage ./garlic/fig/nbody/freeCpu/default.nix {
resDefault = self.garlic.resultFromTrebuchet
self.bsc.garlic.exp.nbody.tampi;
resFreeCpu = self.garlic.resultFromTrebuchet
self.bsc.garlic.exp.nbody.freeCpu;
freeCpu = pp.rPlot {
script = ./garlic/fig/nbody/freeCpu.R;
dataset = with exp.nbody; pp.merge [ baseline freeCpu ];
};
};
};
};