New stage design

This commit is contained in:
Rodrigo Arias 2020-09-02 17:07:09 +02:00
parent d469ccd59d
commit 8110bc2976
12 changed files with 221 additions and 150 deletions

View File

@ -4,28 +4,25 @@
}:
{
app
program
, env ? ""
, argv # bash array as string, example: argv=''(-f "file with spaces" -t 10)''
, program ? "bin/run"
}:
stdenv.mkDerivation {
inherit argv;
name = "${app.name}-argv";
name = "argv";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out <<EOF
#!${bash}/bin/bash
# Requires /nix to use bash
${env}
argv=${argv}
exec ${app}/${program} \''${argv[@]}
exec ${program} \''${argv[@]}
EOF
chmod +x $out/bin/run
chmod +x $out
'';
}

View File

@ -2,23 +2,23 @@
stdenv
}:
program:
{
program
}:
stdenv.mkDerivation {
inherit program;
name = "${program.name}-control";
name = "control";
preferLocalBuild = true;
phases = [ "installPhase" ];
dontPatchShebangs = true;
installPhase = ''
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out <<EOF
#!/bin/sh
#set -e
for n in {1..30}; do
$program/bin/run
${program}
done
EOF
chmod +x $out/bin/run
chmod +x $out
'';
}

View File

@ -25,14 +25,17 @@ let
gitBranch = "garlic/seq";
};
sbatchWrapper = callPackage ./sbatch.nix { };
srunWrapper = callPackage ./srun.nix { };
launchWrapper = callPackage ./launcher.nix { };
controlWrapper = callPackage ./control.nix { };
nixsetupWrapper = callPackage ./nix-setup.nix { };
argvWrapper = callPackage ./argv.nix { };
statspyWrapper = callPackage ./statspy.nix { };
extraeWrapper = callPackage ./extrae.nix { };
runWrappers = {
sbatch = callPackage ./sbatch.nix { };
srun = callPackage ./srun.nix { };
launch = callPackage ./launcher.nix { };
control = callPackage ./control.nix { };
nixsetup= callPackage ./nix-setup.nix { };
argv = callPackage ./argv.nix { };
statspy = callPackage ./statspy.nix { };
extrae = callPackage ./extrae.nix { };
stagen = callPackage ./stagen.nix { };
};
# Perf is tied to a linux kernel specific version
linuxPackages = bsc.linuxPackages_4_4;

View File

@ -1,114 +1,130 @@
{
bsc
, stdenv
, nbody
, genApp
, genConfigs
# Wrappers
, launchWrapper
, sbatchWrapper
, srunWrapper
, argvWrapper
, controlWrapper
, nixsetupWrapper
, statspyWrapper
, extraeWrapper
, perfWrapper
, runWrappers
}:
with stdenv.lib;
let
# Set the configuration for the experiment
config = {
# Set variable configuration for the experiment
varConfig = {
cc = [ bsc.icc ];
blocksize = [ 1024 ];
};
extraConfig = {
# Common configuration
common = {
# Compile time nbody config
gitBranch = "garlic/mpi+send";
mpi = bsc.impi;
# nbody runtime options
particles = 1024*128;
timesteps = 100;
timesteps = 20;
# Resources
ntasksPerNode = "48";
nodes = "1";
time = "02:00:00";
qos = "debug";
# Stage configuration
enableSbatch = true;
enableControl = true;
enableExtrae = false;
enablePerf = false;
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
};
# Compute the cartesian product of all configurations
allConfigs = genConfigs config;
filteredConfigs = with builtins; filter (c: c.blocksize <= 4096) allConfigs;
configs = map (conf: conf // extraConfig) filteredConfigs;
configs = map (conf: conf // common) (genConfigs varConfig);
sbatch = conf: app: with conf; sbatchWrapper {
app = app;
nixPrefix = "/gpfs/projects/bsc15/nix";
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
sbatch = {stage, conf, ...}: with conf; w.sbatch {
program = stageProgram stage;
exclusive = true;
inherit ntasksPerNode nodes time qos;
time = "02:00:00";
qos = "debug";
jobName = "nbody-bs";
inherit nixPrefix nodes ntasksPerNode;
};
srun = app: srunWrapper {
app = app;
nixPrefix = "/gpfs/projects/bsc15/nix";
control = {stage, conf, ...}: with conf; w.control {
program = stageProgram stage;
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,rank";
inherit nixPrefix;
};
argv = conf: app:
with conf;
argvWrapper {
app = app;
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
argv = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
set -e
export I_MPI_THREAD_SPLIT=1
'';
argv = ''(-t ${toString timesteps} -p ${toString particles})'';
argv = ''( -t ${toString conf.timesteps}
-p ${toString conf.particles} )'';
};
statspy = app:
statspyWrapper {
app = app;
nbodyFn = {stage, conf, ...}: with conf; nbody.override {
inherit cc blocksize mpi gitBranch;
};
extrae = app:
extraeWrapper {
app = app;
traceLib = "mpi";
configFile = ./extrae.xml;
};
stages = with common; []
# Use sbatch to request resources first
++ optional enableSbatch sbatch
perf = app:
perfWrapper {
app = app;
perfArgs = "sched record -a";
};
# Repeats the next stages N times
++ optionals enableControl [ nixsetup control ]
nbodyFn = conf:
with conf;
nbody.override { inherit cc mpi blocksize gitBranch; };
# Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace.
++ [ srun nixsetup ]
pipeline = conf:
# sbatch conf (
# nixsetupWrapper (
# controlWrapper (
srun (
nixsetupWrapper (
# extrae (
# perf (
argv conf (
nbodyFn conf
)
# )
# )
)
)
# )
# )
# )
;
# Intrumentation with extrae
++ optional enableExtrae extrae
# Ideally it should look like this:
#pipeline = sbatch nixsetup control argv nbodyFn;
# Optionally profile the next stages with perf
++ optional enablePerf perf
jobs = map pipeline configs;
# Execute the nbody app with the argv and env vars
++ [ argv nbodyFn ];
# List of actual programs to be executed
jobs = map (conf: w.stagen { inherit conf stages; }) configs;
in
launchWrapper jobs
# We simply run each program one after another
w.launch jobs

View File

@ -2,29 +2,36 @@
stdenv
, bash
, extrae
#, writeShellScriptBin
}:
{
app
, traceLib ? "mpi"
program
, configFile
, program ? "bin/run"
, traceLib
}:
#writeShellScriptBin "extraeWrapper" ''
# export EXTRAE_HOME=${extrae}
# export LD_PRELOAD=${extrae}/lib/lib${traceLib}trace.so:$LD_PRELOAD
# export EXTRAE_CONFIG_FILE=${configFile}
# exec ${program}
#''
stdenv.mkDerivation {
name = "${app.name}-extrae";
name = "extrae";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out <<EOF
#!${bash}/bin/bash
# Requires /nix to use bash
export EXTRAE_HOME=${extrae}
export LD_PRELOAD=${extrae}/lib/lib${traceLib}trace.so:$LD_PRELOAD
export EXTRAE_CONFIG_FILE=${configFile}
exec ${app}/${program}
exec ${program}
EOF
chmod +x $out/bin/run
chmod +x $out
'';
}

View File

@ -12,12 +12,13 @@ with stdenv.lib;
stdenv.mkDerivation rec {
name = "nbody";
src = /home/Computational/rarias/bsc-nixpkgs/manual/nbody;
src = /home/Computational/rarias/bscpkgs/manual/nbody;
#src = builtins.fetchGit {
# url = "${gitURL}";
# ref = "${gitBranch}";
#};
programPath = "/bin/nbody";
buildInputs = [
cc
@ -38,7 +39,6 @@ stdenv.mkDerivation rec {
installPhase = ''
mkdir -p $out/bin
cp nbody* $out/bin/${name}
ln -s $out/bin/${name} $out/bin/run
'';
}

View File

@ -2,17 +2,17 @@
stdenv
}:
program:
{
program
}:
stdenv.mkDerivation {
inherit program;
name = "${program.name}-nixsetup";
name = "nixsetup";
preferLocalBuild = true;
phases = [ "installPhase" ];
dontPatchShebangs = true;
installPhase = ''
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out <<EOF
#!/bin/sh
# We need to enter the nix namespace first, in order to have /nix
@ -21,8 +21,8 @@ stdenv.mkDerivation {
exec nix-setup \$0
fi
exec $program/bin/run
exec ${program}
EOF
chmod +x $out/bin/run
chmod +x $out
'';
}

View File

@ -5,22 +5,20 @@
}:
{
app
program
, perfArgs ? "record -a"
, program ? "bin/run"
}:
stdenv.mkDerivation {
name = "${app.name}-perf";
name = "perfWrapper";
preferLocalBuild = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out <<EOF
#!${bash}/bin/bash
exec ${perf}/bin/perf ${perfArgs} ${app}/${program}
exec ${perf}/bin/perf ${perfArgs} ${program}
EOF
chmod +x $out/bin/run
chmod +x $out
'';
}

View File

@ -4,10 +4,10 @@
}:
{
app
program
, jobName
, chdirPrefix ? "."
, nixPrefix ? ""
, argv ? ""
, binary ? "/bin/run"
, ntasks ? null
, ntasksPerNode ? null
@ -33,12 +33,9 @@ let
in
stdenv.mkDerivation rec {
name = "${app.name}-job";
name = "sbatch";
preferLocalBuild = true;
src = ./.;
buildInputs = [ app ];
phases = [ "installPhase" ];
#SBATCH --tasks-per-node=48
@ -46,12 +43,13 @@ stdenv.mkDerivation rec {
#SBATCH --cpus-per-task=1
dontBuild = true;
dontPatchShebangs = true;
programPath = "/${name}";
installPhase = ''
mkdir -p $out
cat > $out/job <<EOF
#!/bin/sh
#SBATCH --job-name="${name}"
#SBATCH --job-name="${jobName}"
''
+ sbatchOpt "ntasks" ntasks
+ sbatchOpt "ntasks-per-node" ntasksPerNode
@ -66,11 +64,10 @@ stdenv.mkDerivation rec {
+ optionalString (extra!=null) extra
+
''
exec ${nixPrefix}${app}${binary} ${argv}
exec ${nixPrefix}${program}
EOF
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out/${name} <<EOF
#!/bin/sh
if [ -e "${chdirPrefix}/$(basename $out)" ]; then
>&2 echo "Execution aborted: '${chdirPrefix}/$(basename $out)' already exists"
@ -80,6 +77,6 @@ stdenv.mkDerivation rec {
echo sbatch ${nixPrefix}$out/job
sbatch ${nixPrefix}$out/job
EOF
chmod +x $out/bin/run
chmod +x $out/${name}
'';
}

View File

@ -2,23 +2,21 @@
stdenv
}:
{
app
program
, nixPrefix ? ""
, srunOptions ? ""
}:
stdenv.mkDerivation rec {
name = "${app.name}-srun";
name = "srun";
preferLocalBuild = true;
phases = [ "installPhase" ];
buildInputs = [ app ];
dontPatchShebangs = true;
installPhase = ''
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out <<EOF
#!/bin/sh
exec srun --mpi=pmi2 ${srunOptions} ${nixPrefix}${app}/bin/run
exec srun --mpi=pmi2 ${srunOptions} ${nixPrefix}${program}
EOF
chmod +x $out/bin/run
chmod +x $out
'';
}

55
bsc/garlic/stagen.nix Normal file
View File

@ -0,0 +1,55 @@
{
stdenv
, bash
, extrae
, writeShellScriptBin
, jq
}:
{
stages
, conf
, experimentName ? "run"
}:
with stdenv.lib;
let
dStages = foldr (stageFn: {conf, prevStage, stages}: {
conf = conf;
prevStage = stageFn {stage=prevStage; conf=conf;};
stages = [ (stageFn {stage=prevStage; conf=conf;}) ] ++ stages;
})
{conf=conf; stages=[]; prevStage=null;} stages;
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
linkStages = imap1 (i: s: {
name = "${toString i}-${baseNameOf s.name}";
path = stageProgram s;
}) dStages.stages;
createLinks = builtins.concatStringsSep "\n"
(map (x: "ln -s ${x.path} $out/bin/${x.name}") linkStages);
firstStageLink = (x: x.name) (elemAt linkStages 0);
in
stdenv.mkDerivation {
name = "stagen";
preferLocalBuild = true;
phases = [ "installPhase" ];
buildInputs = [ jq ];
installPhase = ''
mkdir -p $out/bin
${createLinks}
ln -s ${firstStageLink} $out/bin/${experimentName}
cat > $out/config.raw << EOF
${builtins.toJSON conf}
EOF
jq . $out/config.raw > $out/config.json
rm $out/config.raw
'';
}

View File

@ -4,26 +4,26 @@
}:
{
app
program
, outputDir ? "."
, program ? "bin/run"
}:
stdenv.mkDerivation {
name = "${app.name}-statspy";
name = "statspy";
preferLocalBuild = true;
phases = [ "installPhase" ];
programPath = "/bin/${name}";
installPhase = ''
mkdir -p $out/bin
cat > $out/bin/run <<EOF
cat > $out/bin/${name} <<EOF
#!${bash}/bin/bash
mkdir -p ${outputDir}
cat /proc/[0-9]*/stat | sort -n > ${outputDir}/statspy.\$(date +%s.%3N).begin
${app}/${program}
${program}
cat /proc/[0-9]*/stat | sort -n > ${outputDir}/statspy.\$(date +%s.%3N).end
EOF
chmod +x $out/bin/run
chmod +x $out/bin/${name}
'';
}