WIP isolation

This commit is contained in:
Rodrigo Arias 2020-10-07 09:49:42 +02:00
parent ba221c5200
commit 4ea0d16926
7 changed files with 101 additions and 31 deletions

View File

@ -16,13 +16,15 @@ let
varConfig = { varConfig = {
cc = [ bsc.icc ]; cc = [ bsc.icc ];
mpi = [ bsc.impi ]; mpi = [ bsc.impi ];
#mpi = [ bsc.mpichDebug ];
blocksize = [ 1024 ]; blocksize = [ 1024 ];
}; };
# Common configuration # Common configuration
common = { common = {
# Compile time nbody config # Compile time nbody config
gitBranch = "garlic/tampi+send+oss+task"; gitBranch = "garlic/mpi+send";
#gitBranch = "garlic/tampi+send+oss+task";
# nbody runtime options # nbody runtime options
particles = 1024*4; particles = 1024*4;
@ -30,15 +32,16 @@ let
# Resources # Resources
ntasksPerNode = "2"; ntasksPerNode = "2";
nodes = "2"; nodes = "1";
# Stage configuration # Stage configuration
enableRunexp = true; enableTrebuchet = true;
enableSbatch = true; enableSbatch = true;
enableControl = true; enableControl = true;
enableExtrae = false; enableExtrae = false;
enablePerf = false; enablePerf = false;
enableCtf = false; enableCtf = false;
enableStrace = true;
# MN4 path # MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix"; nixPrefix = "/gpfs/projects/bsc15/nix";
@ -90,6 +93,11 @@ let
perfArgs = "sched record -a"; perfArgs = "sched record -a";
}; };
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
nixsetup = "${nixPrefix}/bin/nix-setup";
};
isolate = {stage, conf, ...}: with conf; w.isolate { isolate = {stage, conf, ...}: with conf; w.isolate {
program = stageProgram stage; program = stageProgram stage;
clusterName = "mn4"; clusterName = "mn4";
@ -110,8 +118,23 @@ let
''; '';
}; };
strace = {stage, conf, ...}: w.strace {
program = stageProgram stage;
};
argv = {stage, conf, ...}: w.argv { argv = {stage, conf, ...}: w.argv {
program = stageProgram stage; program = stageProgram stage;
env = ''
#export I_MPI_PMI_LIBRARY=${bsc.slurm17-libpmi2}/lib/libpmi2.so
export I_MPI_DEBUG=+1000
#export I_MPI_FABRICS=shm
export MPICH_DBG_OUTPUT=VERBOSE
export MPICH_DBG_CLASS=ALL
export MPICH_DBG_OUTPUT=stdout
export FI_LOG_LEVEL=Info
'';
argv = ''( -t ${toString conf.timesteps} argv = ''( -t ${toString conf.timesteps}
-p ${toString conf.particles} )''; -p ${toString conf.particles} )'';
}; };
@ -146,19 +169,25 @@ let
}; };
stages = with common; [] stages = with common; []
# Launch the experiment remotely
#++ optional enableRunexp runexp
# Use sbatch to request resources first # Use sbatch to request resources first
++ optional enableSbatch sbatch ++ optionals enableSbatch [
sbatch
nixsetup
#isolate
]
# Repeats the next stages N times # Repeats the next stages N times
++ optionals enableControl [ isolate control ] ++ optional enableControl control
# Executes srun to launch the program in the requested nodes, and # Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches # immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace. # the next stages from outside the namespace.
++ [ srun isolate ] ++ [
#strace
srun
nixsetup
#isolate
]
# Intrumentation with extrae # Intrumentation with extrae
++ optional enableExtrae extrae ++ optional enableExtrae extrae
@ -169,6 +198,9 @@ let
# Optionally profile nanos6 with the new ctf # Optionally profile nanos6 with the new ctf
++ optional enableCtf ctf ++ optional enableCtf ctf
# Optionally enable strace
#++ optional enableStrace strace
# Execute the nbody app with the argv and env vars # Execute the nbody app with the argv and env vars
++ [ argv nbodyFn ]; ++ [ argv nbodyFn ];
@ -177,7 +209,7 @@ let
launcher = launch jobs; launcher = launch jobs;
runexp = stage: w.runexp { trebuchet = stage: w.trebuchet {
program = stageProgram stage; program = stageProgram stage;
nixPrefix = common.nixPrefix; nixPrefix = common.nixPrefix;
}; };
@ -187,8 +219,7 @@ let
conf = common; conf = common;
}; };
final = runexp (isolatedRun launcher); final = trebuchet (isolatedRun launcher);
in in
# We simply run each program one after another # We simply run each program one after another

View File

@ -16,7 +16,7 @@ with stdenv.lib;
stdenv.mkDerivation rec { stdenv.mkDerivation rec {
name = "nbody"; name = "nbody";
#src = /home/Computational/rarias/bscpkgs/manual/nbody; #src = ~/nbody;
src = builtins.fetchGit { src = builtins.fetchGit {
url = "${gitURL}"; url = "${gitURL}";

View File

@ -16,16 +16,31 @@ nixjoin="@nixPrefix@@nixtools@/bin/nix-join"
env=( env=(
PATH="@nixPrefix@@busybox@/bin:@busybox@/bin:@extraPath@" PATH="@nixPrefix@@busybox@/bin:@busybox@/bin:@extraPath@"
$(env | grep ^SLURM || true) $(env | grep ^SLURM || true)
$(env | grep ^PMI || true)
$(env | grep ^GARLIC_OUT || true) $(env | grep ^GARLIC_OUT || true)
$(env | grep ^USER || true)
HOME="/homeless-shelter"
) )
#-m @nixPrefix@ \ mounts=(
join_flags="-m /etc \ #-m @nixPrefix@
-m /.statelite/tmpfs/etc \ #FIXME: Use only the strictly neccesary from /etc
-m /sys \ -m /etc
-m /var/run/munge \ # The /etc/hosts file is a symlink to this etc/
-m /gpfs/projects/bsc15 \ -m /.statelite/tmpfs/etc
-m /bin:@nixPrefix@@busybox@/bin" -m /sys
-m /dev
-m /proc
# nscd cache: doesn't exist (?)
#-m /var/run/nscd
# Needed for munge auth
-m /var/run/munge
# FIXME: We should only need nix and the output path
-m /gpfs/projects/bsc15
-m /bin:@nixPrefix@@busybox@/bin
)
join_flags="${mounts[@]}"
exec $nixjoin -v -i $join_flags $nixhome -- \ exec $nixjoin -v -i $join_flags $nixhome -- \
env -i "${env[@]}" @out@/bin/stage2 env -i "${env[@]}" @out@/bin/stage2

23
garlic/stages/strace.nix Normal file
View File

@ -0,0 +1,23 @@
{
stdenv
, bash
, strace
}:
{
program
}:
stdenv.mkDerivation {
name = "strace";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
cat > $out <<EOF
#!/bin/sh
exec ${strace}/bin/strace -f ${program}
EOF
chmod +x $out
'';
}

View File

@ -11,15 +11,15 @@
}: }:
stdenv.mkDerivation { stdenv.mkDerivation {
name = "runexp"; name = "trebuchet";
preferLocalBuild = true; preferLocalBuild = true;
phases = [ "unpackPhase" "installPhase" ]; phases = [ "unpackPhase" "installPhase" ];
dontPatchShebangs = true; dontPatchShebangs = true;
src = ./.; src = ./.;
inherit sshHost nixPrefix nixtools targetCluster program; inherit sshHost nixPrefix nixtools targetCluster program;
installPhase = '' installPhase = ''
substituteAllInPlace runexp substituteAllInPlace trebuchet
cp runexp $out cp trebuchet $out
chmod +x $out chmod +x $out
''; '';
} }

View File

@ -1,4 +1,4 @@
#!/bin/sh #!/bin/sh -ex
# @upload-to-mn@ # @upload-to-mn@
# This program runs the current experiment in the ./result symlink in # This program runs the current experiment in the ./result symlink in

View File

@ -28,7 +28,7 @@ let
mpich = callPackage ./bsc/mpich/default.nix { }; mpich = callPackage ./bsc/mpich/default.nix { };
mpichDebug = self.mpich.override { enableDebug = true; }; mpichDebug = self.bsc.mpich.override { enableDebug = true; };
# Updated version of libpsm2: TODO push upstream. # Updated version of libpsm2: TODO push upstream.
#libpsm2 = callPackage ./bsc/libpsm2/default.nix { }; #libpsm2 = callPackage ./bsc/libpsm2/default.nix { };
@ -207,7 +207,8 @@ let
envRecord = callPackage ./garlic/stages/envRecord.nix { }; envRecord = callPackage ./garlic/stages/envRecord.nix { };
valgrind = callPackage ./garlic/stages/valgrind.nix { }; valgrind = callPackage ./garlic/stages/valgrind.nix { };
isolate = callPackage ./garlic/stages/isolate { }; isolate = callPackage ./garlic/stages/isolate { };
runexp = callPackage ./garlic/stages/runexp { }; trebuchet = callPackage ./garlic/stages/trebuchet { };
strace = callPackage ./garlic/stages/strace.nix { };
}; };
# Tests (move to bsc ?) # Tests (move to bsc ?)