From 4ea0d16926fbcb48ce6cea6be11a0bc07cc3da80 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 7 Oct 2020 09:49:42 +0200 Subject: [PATCH] WIP isolation --- garlic/exp/nbody/tampi.nix | 65 ++++++++++++++----- garlic/nbody/default.nix | 2 +- garlic/stages/isolate/stage1 | 29 +++++++-- garlic/stages/strace.nix | 23 +++++++ .../stages/{runexp => trebuchet}/default.nix | 6 +- .../{runexp/runexp => trebuchet/trebuchet} | 2 +- overlay.nix | 5 +- 7 files changed, 101 insertions(+), 31 deletions(-) create mode 100644 garlic/stages/strace.nix rename garlic/stages/{runexp => trebuchet}/default.nix (80%) rename garlic/stages/{runexp/runexp => trebuchet/trebuchet} (96%) diff --git a/garlic/exp/nbody/tampi.nix b/garlic/exp/nbody/tampi.nix index 398d4b9..a29280c 100644 --- a/garlic/exp/nbody/tampi.nix +++ b/garlic/exp/nbody/tampi.nix @@ -16,13 +16,15 @@ let varConfig = { cc = [ bsc.icc ]; mpi = [ bsc.impi ]; + #mpi = [ bsc.mpichDebug ]; blocksize = [ 1024 ]; }; # Common configuration common = { # Compile time nbody config - gitBranch = "garlic/tampi+send+oss+task"; + gitBranch = "garlic/mpi+send"; + #gitBranch = "garlic/tampi+send+oss+task"; # nbody runtime options particles = 1024*4; @@ -30,15 +32,16 @@ let # Resources ntasksPerNode = "2"; - nodes = "2"; + nodes = "1"; # Stage configuration - enableRunexp = true; - enableSbatch = true; - enableControl = true; - enableExtrae = false; - enablePerf = false; - enableCtf = false; + enableTrebuchet = true; + enableSbatch = true; + enableControl = true; + enableExtrae = false; + enablePerf = false; + enableCtf = false; + enableStrace = true; # MN4 path nixPrefix = "/gpfs/projects/bsc15/nix"; @@ -90,6 +93,11 @@ let perfArgs = "sched record -a"; }; + nixsetup = {stage, conf, ...}: with conf; w.nixsetup { + program = stageProgram stage; + nixsetup = "${nixPrefix}/bin/nix-setup"; + }; + isolate = {stage, conf, ...}: with conf; w.isolate { program = stageProgram stage; clusterName = "mn4"; @@ -110,8 +118,23 @@ let ''; }; + strace = {stage, conf, ...}: w.strace { + program = stageProgram stage; + }; + argv = {stage, conf, ...}: w.argv { program = stageProgram stage; + env = '' + #export I_MPI_PMI_LIBRARY=${bsc.slurm17-libpmi2}/lib/libpmi2.so + export I_MPI_DEBUG=+1000 + #export I_MPI_FABRICS=shm + + export MPICH_DBG_OUTPUT=VERBOSE + export MPICH_DBG_CLASS=ALL + export MPICH_DBG_OUTPUT=stdout + + export FI_LOG_LEVEL=Info + ''; argv = ''( -t ${toString conf.timesteps} -p ${toString conf.particles} )''; }; @@ -146,19 +169,25 @@ let }; stages = with common; [] - # Launch the experiment remotely - #++ optional enableRunexp runexp - # Use sbatch to request resources first - ++ optional enableSbatch sbatch + ++ optionals enableSbatch [ + sbatch + nixsetup + #isolate + ] # Repeats the next stages N times - ++ optionals enableControl [ isolate control ] + ++ optional enableControl control # Executes srun to launch the program in the requested nodes, and # immediately after enters the nix environment again, as slurmstepd launches # the next stages from outside the namespace. - ++ [ srun isolate ] + ++ [ + #strace + srun + nixsetup + #isolate + ] # Intrumentation with extrae ++ optional enableExtrae extrae @@ -169,6 +198,9 @@ let # Optionally profile nanos6 with the new ctf ++ optional enableCtf ctf + # Optionally enable strace + #++ optional enableStrace strace + # Execute the nbody app with the argv and env vars ++ [ argv nbodyFn ]; @@ -177,7 +209,7 @@ let launcher = launch jobs; - runexp = stage: w.runexp { + trebuchet = stage: w.trebuchet { program = stageProgram stage; nixPrefix = common.nixPrefix; }; @@ -187,8 +219,7 @@ let conf = common; }; - final = runexp (isolatedRun launcher); - + final = trebuchet (isolatedRun launcher); in # We simply run each program one after another diff --git a/garlic/nbody/default.nix b/garlic/nbody/default.nix index 9c7614d..70ece4b 100644 --- a/garlic/nbody/default.nix +++ b/garlic/nbody/default.nix @@ -16,7 +16,7 @@ with stdenv.lib; stdenv.mkDerivation rec { name = "nbody"; - #src = /home/Computational/rarias/bscpkgs/manual/nbody; + #src = ~/nbody; src = builtins.fetchGit { url = "${gitURL}"; diff --git a/garlic/stages/isolate/stage1 b/garlic/stages/isolate/stage1 index ee8b4d8..02ed71b 100644 --- a/garlic/stages/isolate/stage1 +++ b/garlic/stages/isolate/stage1 @@ -16,16 +16,31 @@ nixjoin="@nixPrefix@@nixtools@/bin/nix-join" env=( PATH="@nixPrefix@@busybox@/bin:@busybox@/bin:@extraPath@" $(env | grep ^SLURM || true) + $(env | grep ^PMI || true) $(env | grep ^GARLIC_OUT || true) + $(env | grep ^USER || true) + HOME="/homeless-shelter" ) -#-m @nixPrefix@ \ -join_flags="-m /etc \ - -m /.statelite/tmpfs/etc \ - -m /sys \ - -m /var/run/munge \ - -m /gpfs/projects/bsc15 \ - -m /bin:@nixPrefix@@busybox@/bin" +mounts=( + #-m @nixPrefix@ + #FIXME: Use only the strictly neccesary from /etc + -m /etc + # The /etc/hosts file is a symlink to this etc/ + -m /.statelite/tmpfs/etc + -m /sys + -m /dev + -m /proc + # nscd cache: doesn't exist (?) + #-m /var/run/nscd + # Needed for munge auth + -m /var/run/munge + # FIXME: We should only need nix and the output path + -m /gpfs/projects/bsc15 + -m /bin:@nixPrefix@@busybox@/bin +) + +join_flags="${mounts[@]}" exec $nixjoin -v -i $join_flags $nixhome -- \ env -i "${env[@]}" @out@/bin/stage2 diff --git a/garlic/stages/strace.nix b/garlic/stages/strace.nix new file mode 100644 index 0000000..56eb932 --- /dev/null +++ b/garlic/stages/strace.nix @@ -0,0 +1,23 @@ +{ + stdenv +, bash +, strace +}: + +{ + program +}: + +stdenv.mkDerivation { + name = "strace"; + preferLocalBuild = true; + phases = [ "installPhase" ]; + installPhase = '' + cat > $out <