From a38ff31ccabb2a3a189634a43c12c22eccc3fcbb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 13 Oct 2020 13:00:59 +0200 Subject: [PATCH] Introduce the runexp stage --- garlic/stages/experiment.nix | 15 +++++++++++++++ garlic/stages/runexp/default.nix | 29 +++++++++++++++++++++++++++++ garlic/stages/runexp/runexp | 18 ++++++++++++++++++ garlic/stages/sbatch.nix | 16 +++++----------- garlic/stages/trebuchet.nix | 18 +++++++----------- garlic/stdexp.nix | 11 +++++++---- overlay.nix | 1 + 7 files changed, 82 insertions(+), 26 deletions(-) create mode 100644 garlic/stages/runexp/default.nix create mode 100755 garlic/stages/runexp/runexp diff --git a/garlic/stages/experiment.nix b/garlic/stages/experiment.nix index 50ee2be..3ad4715 100644 --- a/garlic/stages/experiment.nix +++ b/garlic/stages/experiment.nix @@ -25,6 +25,21 @@ stdenv.mkDerivation { cat > $out << EOF #!/bin/sh + if [ -z "\$GARLIC_OUT" ]; then + >&2 echo "GARLIC_OUT not defined, aborting" + exit 1 + fi + + export GARLIC_EXPERIMENT=$(basename $out) + + if [ -e "\$GARLIC_EXPERIMENT" ]; then + >&2 echo "Already exists \$GARLIC_EXPERIMENT, aborting" + exit 1 + fi + + mkdir -p "\$GARLIC_EXPERIMENT" + cd "\$GARLIC_EXPERIMENT" + # This is an experiment formed by the following units: ${unitsString} EOF diff --git a/garlic/stages/runexp/default.nix b/garlic/stages/runexp/default.nix new file mode 100644 index 0000000..991a7d8 --- /dev/null +++ b/garlic/stages/runexp/default.nix @@ -0,0 +1,29 @@ +{ + stdenv +, garlicTools +}: + +{ + nextStage +, nixPrefix +}: + +with garlicTools; + +stdenv.mkDerivation { + name = "runexp"; + preferLocalBuild = true; + phases = [ "unpackPhase" "installPhase" ]; + src = ./.; + dontPatchShebangs = true; + programPath = "/bin/runexp"; + inherit nixPrefix nextStage; + program = stageProgram nextStage; + installPhase = '' + substituteAllInPlace runexp + + mkdir -p $out/bin + cp runexp $out/bin/ + chmod +x $out/bin/runexp + ''; +} diff --git a/garlic/stages/runexp/runexp b/garlic/stages/runexp/runexp new file mode 100755 index 0000000..5b929a5 --- /dev/null +++ b/garlic/stages/runexp/runexp @@ -0,0 +1,18 @@ +#!/bin/sh -ex + +if [ -e /nix ]; then + >&2 echo "Cannot use runexp inside nix environment!" + exit 1 +fi + +>&2 echo Running runexp for MN4 +>&2 echo PATH=$PATH + +user=$(id -un) +group=$(id -gn) + +export GARLIC_OUT="/gpfs/projects/$group/$user/garlic-out" +mkdir -p "$GARLIC_OUT" +cd "$GARLIC_OUT" + +exec @nixPrefix@@program@ diff --git a/garlic/stages/sbatch.nix b/garlic/stages/sbatch.nix index 4bd49fd..74f696d 100644 --- a/garlic/stages/sbatch.nix +++ b/garlic/stages/sbatch.nix @@ -8,7 +8,7 @@ { nextStage , jobName -, chdirPrefix ? "." +, chdir ? "." , nixPrefix ? "" , binary ? "/bin/run" , ntasks ? null @@ -49,7 +49,7 @@ stdenv.mkDerivation rec { #SBATCH --cpus-per-task=1 dontBuild = true; dontPatchShebangs = true; - programPath = "/${name}"; + programPath = "/run"; installPhase = '' mkdir -p $out @@ -61,7 +61,7 @@ stdenv.mkDerivation rec { + sbatchOpt "ntasks-per-node" ntasksPerNode + sbatchOpt "ntasks-per-socket" ntasksPerSocket + sbatchOpt "nodes" nodes - + sbatchOpt "chdir" "${chdirPrefix}/$(basename $out)" + + sbatchOpt "chdir" chdir + sbatchOpt "output" output + sbatchOpt "error" error + sbatchEnable "exclusive" exclusive @@ -75,16 +75,10 @@ stdenv.mkDerivation rec { exec ${nixPrefix}${stageProgram nextStage} EOF - cat > $out/${name} < $out/run <&2 echo "Execution aborted: '${chdirPrefix}/$(basename $out)' already exists" - exit 1 - fi - mkdir -p "${chdirPrefix}/$(basename $out)" - echo ${slurm}/bin/sbatch ${nixPrefix}$out/job ${slurm}/bin/sbatch ${nixPrefix}$out/job EOF - chmod +x $out/${name} + chmod +x $out/run ''; } diff --git a/garlic/stages/trebuchet.nix b/garlic/stages/trebuchet.nix index 113b154..045ab51 100644 --- a/garlic/stages/trebuchet.nix +++ b/garlic/stages/trebuchet.nix @@ -1,6 +1,5 @@ { stdenv -, nixtools , garlicTools }: @@ -29,18 +28,15 @@ stdenv.mkDerivation { #!/bin/sh -e # Using the token @upload-to-mn@ we instruct the post-build hook to upload # this script and it's closure to the MN4 cluster, so it can run there. + + # This trebuchet launches: + # ${nextStage} + # ${nextStage.nextStage} + # ${nextStage.nextStage.nextStage} + # Take a look at ${program} # to see what is being executed. - - # This trebuchet launches the following experiment in an isolated - # environment: - # ${nextStage.nextStage} - - nixtools=${nixPrefix}${nixtools}/bin - runexp=\$nixtools/${targetCluster}/runexp - - >&2 echo "Launching \"\$runexp ${program}\" in MN4" - ssh ${sshHost} \$runexp ${program} + ssh ${sshHost} ${nixPrefix}${program} EOF chmod +x $out ''; diff --git a/garlic/stdexp.nix b/garlic/stdexp.nix index e456c76..880cdcd 100644 --- a/garlic/stdexp.nix +++ b/garlic/stdexp.nix @@ -14,13 +14,16 @@ let in rec { /* Takes a list of units and builds an experiment, after executing the - trebuchet and the isolate stages. Returns the trebuchet stage. */ + trebuchet, runexp and isolate stages. Returns the trebuchet stage. */ buildTrebuchet = units: stages.trebuchet { inherit (machineConf) nixPrefix; - nextStage = stages.isolate { + nextStage = stages.runexp { inherit (machineConf) nixPrefix; - nextStage = stages.experiment { - inherit units; + nextStage = stages.isolate { + inherit (machineConf) nixPrefix; + nextStage = stages.experiment { + inherit units; + }; }; }; }; diff --git a/overlay.nix b/overlay.nix index a205878..cb4108a 100644 --- a/overlay.nix +++ b/overlay.nix @@ -212,6 +212,7 @@ let extrae = callPackage ./garlic/stages/extrae.nix { }; valgrind = callPackage ./garlic/stages/valgrind.nix { }; isolate = callPackage ./garlic/stages/isolate { }; + runexp = callPackage ./garlic/stages/runexp { }; trebuchet = callPackage ./garlic/stages/trebuchet.nix { }; strace = callPackage ./garlic/stages/strace.nix { }; unit = callPackage ./garlic/stages/unit.nix { };