From 3bd4e61f3fed657dece6792802eb847a7742f7fb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 Oct 2020 19:43:02 +0100 Subject: [PATCH] WIP: Testing with automatic fetching --- garlic/exp/nbody/granularity-mpi.nix | 19 ++++--- garlic/pp/check.nix | 33 ++++++++++++ garlic/pp/fetch.nix | 10 +++- garlic/pp/result2.nix | 81 ++++++++++++++++++++++++++++ garlic/stages/experiment.nix | 2 +- garlic/stages/runexp/runexp | 6 +-- overlay.nix | 18 ++++--- 7 files changed, 149 insertions(+), 20 deletions(-) create mode 100644 garlic/pp/check.nix create mode 100644 garlic/pp/result2.nix diff --git a/garlic/exp/nbody/granularity-mpi.nix b/garlic/exp/nbody/granularity-mpi.nix index 9b152a3..d226138 100644 --- a/garlic/exp/nbody/granularity-mpi.nix +++ b/garlic/exp/nbody/granularity-mpi.nix @@ -25,7 +25,7 @@ let gitBranch = "garlic/mpi+send"; # Repeat the execution of each unit 30 times - loops = 30; + loops = 10; # Resources qos = "debug"; @@ -34,6 +34,10 @@ let time = "02:00:00"; cpuBind = "sockets,verbose"; jobName = "nbody-bs-${toString blocksize}-${gitBranch}"; + + # Experiment revision: this allows a user to run again a experiment already + # executed + rev = 0; }; # Compute the array of configurations @@ -47,12 +51,13 @@ let }; program = {nextStage, conf, ...}: with conf; - let - customPkgs = stdexp.replaceMpi conf.mpi; - in - customPkgs.apps.nbody.override { - inherit cc blocksize mpi gitBranch; - }; + # FIXME: This is becoming very slow: + #let + # customPkgs = stdexp.replaceMpi conf.mpi; + #in + bsc.garlic.apps.nbody.override { + inherit cc blocksize mpi gitBranch; + }; pipeline = stdexp.stdPipeline ++ [ exec program ]; diff --git a/garlic/pp/check.nix b/garlic/pp/check.nix new file mode 100644 index 0000000..dc67860 --- /dev/null +++ b/garlic/pp/check.nix @@ -0,0 +1,33 @@ +{ + stdenv +}: + +resultTree: + +stdenv.mkDerivation { + name = "check"; + preferLocalBuild = true; + phases = [ "installPhase" ]; + installPhase = '' + echo "checking result tree: ${resultTree}" + cd ${resultTree} + for exp in *-experiment; do + cd ${resultTree}/$exp + echo "$exp: checking units" + for unit in *-unit; do + cd ${resultTree}/$exp/$unit + if [ ! -e status ]; then + echo "missing $unit/status file, aborting" + exit 1 + fi + st=$(cat status) + if [ "$st" != "completed" ]; then + echo "unit $unit is not complete yet, aborting" + exit 1 + fi + done + echo "$exp: execution complete" + done + ln -s $out ${resultTree} + ''; +} diff --git a/garlic/pp/fetch.nix b/garlic/pp/fetch.nix index feeedda..d7f42a8 100644 --- a/garlic/pp/fetch.nix +++ b/garlic/pp/fetch.nix @@ -31,14 +31,17 @@ in name = "fetch"; preferLocalBuild = true; - buildInputs = [ rsync openssh curl ]; + buildInputs = [ rsync openssh curl nix ]; phases = [ "installPhase" ]; + # This doesn't work when multiple users have different directories where the + # results are stored. + #src = /. + "${prefix}${experimentName}"; installPhase = '' cat > $out << EOF #!/bin/sh -e mkdir -p ${garlicTemp} - export PATH=${rsync}/bin:${openssh}/bin:${nix}/bin + export PATH=$PATH rsync -av \ --copy-links \ ${rsyncFilter} \ @@ -50,8 +53,11 @@ in garlicTemp = "${garlicTemp}"; \ })') + rm -rf ${garlicTemp}/${experimentName} + echo "The results for experiment ${experimentName} are at:" echo " \$res" + EOF chmod +x $out ''; diff --git a/garlic/pp/result2.nix b/garlic/pp/result2.nix new file mode 100644 index 0000000..88a125f --- /dev/null +++ b/garlic/pp/result2.nix @@ -0,0 +1,81 @@ +{ + stdenv +, garlicTools +}: + +{ + trebuchetStage +, experimentStage +, garlicTemp +}: + +with garlicTools; + +let + experimentName = baseNameOf (toString experimentStage); + garlicOut = "/mnt/garlic-out"; +in + stdenv.mkDerivation { + name = "result"; + preferLocalBuild = true; + __noChroot = true; + + phases = [ "installPhase" ]; + + installPhase = '' + expList=$(find ${garlicOut} -maxdepth 2 -name ${experimentName}) + + if [ -z "$expList" ]; then + echo "ERROR: missing results for ${experimentName}" + echo "Execute it by running:" + echo + echo -e " \e[30;48;5;2m${trebuchetStage}\e[0m" + echo + echo "cannot continue building $out, aborting" + exit 1 + fi + + N=$(echo $expList | wc -l) + echo "Found $N results: $expList" + + if [ $N -gt 1 ]; then + echo + echo "ERROR: multiple results for ${experimentName}:" + echo "$expList" + echo + echo "cannot continue building $out, aborting" + exit 1 + fi + + exp=$expList + repeat=1 + while [ 1 ]; do + repeat=0 + cd $exp + echo "$exp: checking units" + for unit in *-unit; do + cd $exp/$unit + if [ ! -e status ]; then + echo "$unit: no status" + repeat=1 + else + st=$(cat status) + echo "$unit: $st" + if [ "$st" != "completed" ]; then + repeat=1 + fi + fi + done + + if [ $repeat -eq 0 ]; then + break + fi + echo "waiting 10 seconds to try again" + sleep 10 + done + echo "$exp: execution complete" + + mkdir -p $out + cp -aL $exp $out + ''; + } diff --git a/garlic/stages/experiment.nix b/garlic/stages/experiment.nix index 95535e8..205d72a 100644 --- a/garlic/stages/experiment.nix +++ b/garlic/stages/experiment.nix @@ -38,7 +38,7 @@ stdenv.mkDerivation { export GARLIC_EXPERIMENT=$(basename $out) if [ -e "\$GARLIC_EXPERIMENT" ]; then - >&2 echo "skipping, experiment path already exists: \$GARLIC_EXPERIMENT" + >&2 echo "skipping, path exists: \$(pwd)/\$GARLIC_EXPERIMENT" exit 0 fi diff --git a/garlic/stages/runexp/runexp b/garlic/stages/runexp/runexp index 1d61515..512c7b5 100755 --- a/garlic/stages/runexp/runexp +++ b/garlic/stages/runexp/runexp @@ -5,13 +5,13 @@ if [ -e /nix ]; then exit 1 fi ->&2 echo Running runexp for MN4 ->&2 echo PATH=$PATH +#>&2 echo Running runexp for MN4 +#>&2 echo PATH=$PATH user=$(id -un) group=$(id -gn) -export GARLIC_OUT="/gpfs/projects/$group/$user/garlic-out" +export GARLIC_OUT="/gpfs/projects/bsc15/garlic/out/$user" mkdir -p "$GARLIC_OUT" cd "$GARLIC_OUT" diff --git a/overlay.nix b/overlay.nix index 43abed0..610c7e2 100644 --- a/overlay.nix +++ b/overlay.nix @@ -314,15 +314,14 @@ let # Post processing tools pp = rec { - getExpResult = callPackage ./garlic/pp/result.nix { - inherit fetchExperiment; + getExpResult = callPackage ./garlic/pp/result2.nix { }; - resultFromTrebuchet = trebuchetStage: getExpResult { + resultFromTrebuchet = trebuchetStage: (getExpResult { garlicTemp = "/tmp/garlic-temp"; inherit trebuchetStage; experimentStage = with self.bsc.garlicTools; getExperimentStage trebuchetStage; - }; + }); fetchExperiment = callPackage ./garlic/pp/fetch.nix { }; timetable = callPackage ./garlic/pp/timetable.nix { }; rPlot = callPackage ./garlic/pp/rplot.nix { }; @@ -337,9 +336,14 @@ let # Datasets used in the figures ds = with self.bsc.garlic; with pp; { nbody = with exp.nbody; { - test = merge [ baseline ]; - jemalloc = merge [ baseline jemalloc ]; - freeCpu = merge [ baseline freeCpu ]; + test = merge [ test ]; + baseline = merge [ baseline ]; + jemalloc = merge [ jemalloc ]; + freeCpu = merge [ freeCpu ]; + cmp = { + jemalloc = merge [ baseline jemalloc ]; + freeCpu = merge [ baseline freeCpu ]; + }; }; };