Archived
1
0
forked from rarias/bscpkgs

WIP: postprocessing pipeline

Now each run is executed in a independent folder
This commit is contained in:
2020-10-21 18:18:43 +02:00
parent 1321b6a888
commit 4beb069627
20 changed files with 232 additions and 279 deletions

58
garlic/pp/fetch.nix Normal file
View File

@@ -0,0 +1,58 @@
{
stdenv
, rsync
, openssh
, nix
, curl
, garlicTools
}:
{
sshHost
, prefix
, experimentStage
, trebuchetStage
, garlicTemp
# We only fetch the config, stdout and stderr by default
, fetchAll ? false
}:
with garlicTools;
let
experimentName = baseNameOf (toString experimentStage);
rsyncFilter = if (fetchAll) then "" else ''
--include='*/*/garlic_config.json' \
--include='*/*/std*.log' \
--include='*/*/*/std*.log' \
--exclude='*/*/*/*' '';
in
stdenv.mkDerivation {
name = "fetch";
preferLocalBuild = true;
buildInputs = [ rsync openssh curl ];
phases = [ "installPhase" ];
installPhase = ''
cat > $out << EOF
#!/bin/sh -e
mkdir -p ${garlicTemp}
export PATH=${rsync}/bin:${openssh}/bin:${nix}/bin
rsync -av \
--copy-links \
${rsyncFilter} \
'${sshHost}:${prefix}/${experimentName}' ${garlicTemp}
res=\$(nix-build -E '(with import ./default.nix; garlic.pp.getExpResult { \
experimentStage = "${experimentStage}"; \
trebuchetStage = "${trebuchetStage}"; \
garlicTemp = "${garlicTemp}"; \
})')
echo "The results for experiment ${experimentName} are at:"
echo " \$res"
EOF
chmod +x $out
'';
}

View File

@@ -0,0 +1,25 @@
{
stdenv
, ministat
}:
stdenv.mkDerivation {
name = "hist";
preferLocalBuild = true;
src = ./.;
dontBuild = true;
dontConfigure = true;
inherit ministat;
patchPhase = ''
substituteAllInPlace hist.sh
'';
installPhase = ''
mkdir -p $out/bin
cp hist.sh $out/bin/hist
chmod +x $out/bin/hist
'';
}

79
garlic/pp/hist/hist.sh Executable file
View File

@@ -0,0 +1,79 @@
#!/bin/bash
# Use it either reading from stdin or by specifing
# multiple files as arguments
# xeon07$ hist stdout.log
# x <stdin>
# +------------------------------------------------------------------------+
# | x |
# | x |
# | x |
# | x |
# | x |
# | xxx |
# | xxx |
# | xxxxx |
# | xxxxxx |
# | xxxxxxx x|
# ||________M_A___________| |
# +------------------------------------------------------------------------+
# N Min Max Median Avg Stddev
# x 30 3.585183 3.763913 3.591559 3.5973344 0.031719975
#
# Other ministat options can be passed as well. The -S option splits the results
# in multiple plots.
usage() { echo "Usage: hist [-hSAns] [-c confidence] [-w width] files..." 1>&2; exit 1; }
function stat_files() {
tmpfiles=()
sedcmd=""
for file in ${files[@]}; do
tmp=$(mktemp)
awk '/^time /{print $2}' "$file" > "$tmp"
sedcmd+="s:$tmp:$file:g;"
tmpfiles+=($tmp)
done
if [ $split == 1 ]; then
for f in "${tmpfiles[@]}"; do
ministat $ministat_opt $f | sed -e "$sedcmd"
done
else
ministat $ministat_opt ${tmpfiles[@]} | sed -e "$sedcmd"
fi
rm ${tmpfiles[@]}
}
split=0
ministat_opt="-w72"
while getopts "hSAnsc:w:" o; do
case "${o}" in
S) split=1 ;;
c) ministat_opt+=" -c $OPTARG" ;;
w) ministat_opt+=" -w $OPTARG" ;;
A) ministat_opt+=" -$o" ;;
n) ministat_opt+=" -$o" ;;
s) ministat_opt+=" -$o" ;;
*) usage ;;
esac
done
shift $((OPTIND-1))
ministat=@ministat@/bin
#ministat=/nix/store/sh9b484bnhkajxnblpwix7fhbkid6365-ministat-20150715-1/bin
export PATH="$PATH:$ministat"
files=("$@")
if [[ -z "${files[@]}" ]]; then
awk '/^time /{print $2}' | ministat $ministat_opt
else
stat_files
fi

16
garlic/pp/merge.nix Normal file
View File

@@ -0,0 +1,16 @@
{
stdenv
}:
experiments:
with stdenv.lib;
stdenv.mkDerivation {
name = "merge.json";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
cat ${concatStringsSep " " experiments} >> $out
'';
}

43
garlic/pp/result.nix Normal file
View File

@@ -0,0 +1,43 @@
{
stdenv
, garlicTools
, fetchExperiment
}:
{
trebuchetStage
, experimentStage
, garlicTemp
}:
with garlicTools;
let
experimentName = baseNameOf (toString experimentStage);
fetcher = fetchExperiment {
sshHost = "mn1";
prefix = "/gpfs/projects/\\\$(id -gn)/\\\$(id -un)/garlic-out";
garlicTemp = "/tmp/garlic-temp";
inherit experimentStage trebuchetStage;
};
in
stdenv.mkDerivation {
name = "result";
preferLocalBuild = true;
__noChroot = true;
phases = [ "installPhase" ];
installPhase = ''
expPath=${garlicTemp}/${experimentName}
if [ ! -e $expPath ]; then
echo "The experiment ${experimentName} is missing in ${garlicTemp}."
echo "Please fetch it and try again."
echo "You can execute ${trebuchetStage} to run the experiment."
echo "And then ${fetcher} to get the results."
exit 1
fi
mkdir -p $out
cp -a ${garlicTemp}/${experimentName} $out
'';
}

33
garlic/pp/rplot.nix Normal file
View File

@@ -0,0 +1,33 @@
{
stdenv
, rWrapper
, rPackages
}:
{
# The two results to be compared
dataset
, script
, extraRPackages ? []
}:
with stdenv.lib;
let
customR = rWrapper.override {
packages = with rPackages; [ tidyverse ] ++ extraRPackages;
};
in stdenv.mkDerivation {
name = "plot";
buildInputs = [ customR ];
preferLocalBuild = true;
dontPatchShebangs = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out
cd $out
Rscript --vanilla ${script} ${dataset}
'';
}

30
garlic/pp/timeResult.nix Normal file
View File

@@ -0,0 +1,30 @@
{
stdenv
}:
inputResult:
stdenv.mkDerivation {
name = "timeResult";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out
cd ${inputResult}
for unit in *-experiment/*-unit; do
outunit=$out/$unit
mkdir -p $outunit
# Copy the unit config
conf="$unit/garlic_config.json"
cp "$conf" "$outunit/garlic_config.json"
# Merge all runs in one single CSV file
echo "run time" > $outunit/data.csv
for r in $(cd $unit; ls -d [0-9]* | sort -n); do
log="$unit/$r/stdout.log"
awk "/^time /{print \"$r\", \$2}" $log >> $outunit/data.csv
done
done
'';
}

31
garlic/pp/timetable.nix Normal file
View File

@@ -0,0 +1,31 @@
{
stdenv
, jq
}:
inputResult:
stdenv.mkDerivation {
name = "timetable.json";
preferLocalBuild = true;
phases = [ "installPhase" ];
buildInputs = [ jq ];
installPhase = ''
touch $out
cd ${inputResult}
for exp in *-experiment; do
cd ${inputResult}/$exp
for unit in *-unit; do
cd ${inputResult}/$exp/$unit
conf=garlic_config.json
for run in $(ls -d [0-9]* | sort -n); do
time=$(awk '/^time /{print $2}' $run/stdout.log)
jq -cn "{ exp:\"$exp\", unit:\"$unit\", config:inputs, time:$time}" \
$conf >> $out
done
done
done
#gzip $out
'';
}