New config design

This commit is contained in:
Rodrigo Arias 2020-10-09 19:33:06 +02:00
parent 9020f87765
commit 298c7362b3
4 changed files with 158 additions and 352 deletions

View File

@ -1,23 +1,16 @@
{ {
stdenv stdenv
, nixpkgs , stdexp
, pkgs , bsc
, genApp , targetMachine
, genConfigs , stages
, runWrappers
}: }:
with stdenv.lib; with stdenv.lib;
let let
bsc = pkgs.bsc; # Initial variable configuration
varConf = {
# Set variable configuration for the experiment
varConfig = {
cc = [ bsc.icc ]; # [ bsc.icc pkgs.gfortran10 ];
mpi = [ bsc.impi ]; # [ bsc.impi bsc.openmpi-mn4 ];
input = [ input = [
{ nodes=1 ; nprocz=2 ; granul=37; time= "10:00:00"; } { nodes=1 ; nprocz=2 ; granul=37; time= "10:00:00"; }
{ nodes=2 ; nprocz=4 ; granul=19; time= "05:00:00"; } { nodes=2 ; nprocz=4 ; granul=19; time= "05:00:00"; }
@ -26,155 +19,65 @@ let
{ nodes=16; nprocz=32; granul=9 ; time= "01:00:00"; } { nodes=16; nprocz=32; granul=9 ; time= "01:00:00"; }
]; ];
gitBranch = [ "garlic/mpi+isend+oss+task" gitBranch = [
"garlic/mpi+send+omp+fork" "garlic/mpi+isend+oss+task"
"garlic/mpi+send+oss+task" "garlic/mpi+send+omp+fork"
"garlic/tampi+isend+oss+task" "garlic/mpi+send+oss+task"
"garlic/tampi+isend+oss+task"
]; ];
}; };
# Common configuration # Generate the complete configuration for each unit
common = { genConf = with bsc; c: targetMachine.config // rec {
# Options for creams
cc = icc;
mpi = impi;
inherit (c.input) granul;
inherit (c) gitBranch;
nprocz = 2 * nodes;
# Repeat the execution of each unit 30 times
loops = 30;
# Resources # Resources
ntasksPerNode = 2; qos = "debug";
#ntasksPerSocket = 1; // Add this variable to nix ntasksPerNode = 2;
inherit (c.input) time nodes;
# Stage configuration cpuBind = "socket,verbose";
enableSbatch = true; jobName = "creams-ss-${toString nodes}-${gitBranch}";
enableControl = true;
enableExtrae = false;
enablePerf = false;
enableCtf = false;
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
}; };
# Compute the cartesian product of all configurations # Compute the array of configurations
configs = map (conf: conf // common) (genConfigs varConfig); configs = stdexp.buildConfigs {
inherit varConf genConf;
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
sbatch = {stage, conf, ...}: with conf; w.sbatch {
nodes = input.nodes;
program = stageProgram stage;
exclusive = true;
time = input.time;
#qos = "debug";
jobName = "creams-ss-${toString input.nodes}-${toString gitBranch}";
inherit nixPrefix ntasksPerNode;
}; };
control = {stage, conf, ...}: with conf; w.control { # Custom stage to copy the creams input dataset
program = stageProgram stage; copyInput = {nextStage, conf, ...}:
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,socket";
inherit nixPrefix;
};
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
nixsetup = "${nixPrefix}/bin/nix-setup";
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
ctf = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
export NANOS6=ctf
export NANOS6_CTF2PRV=0
'';
};
bscOverlay = import ../../../overlay.nix;
genPkgs = newOverlay: nixpkgs {
overlays = [
bscOverlay
newOverlay
];
};
inputDataset = {stage, conf, ...}:
let
input = bsc.garlic.creamsInput.override {
gitBranch = conf.gitBranch;
granul = conf.input.granul;
nprocz = conf.input.nprocz;
};
in w.argv
{
program = stageProgram stage;
env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
# We may be able to use overlays by invoking the fix function directly, but we
# have to get the definition of the bsc packages and the garlic ones as
# overlays.
creamsFn = {stage, conf, ...}: with conf;
let let
# We set the mpi implementation to the one specified in the conf, so all input = bsc.garlic.apps.creamsInput.override {
# packages in bsc will use that one. inherit (conf) gitBranch granul nprocz;
customPkgs = genPkgs (self: super: { };
bsc = super.bsc // { mpi = conf.mpi; };
});
in in
customPkgs.bsc.garlic.creams.override { stages.exec {
inherit cc mpi gitBranch; inherit nextStage;
}; env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
stages = with common; [] # Creams program
# Use sbatch to request resources first creams = {nextStage, conf, ...}: with conf;
++ optional enableSbatch sbatch let
customPkgs = stdexp.replaceMpi conf.mpi;
in
customPkgs.apps.creams.override {
inherit cc mpi gitBranch;
};
# Repeats the next stages N times pipeline = stdexp.stdPipeline ++ [ copyInput creams ];
++ optionals enableControl [ nixsetup control ]
# Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace.
++ [ srun nixsetup ]
# Intrumentation with extrae
++ optional enableExtrae extrae
# Optionally profile the next stages with perf
++ optional enablePerf perf
# Optionally profile nanos6 with the new ctf
++ optional enableCtf ctf
# Execute the app with the argv and env vars
++ [ inputDataset creamsFn ];
# List of actual programs to be executed
jobs = map (conf: w.stagen { inherit conf stages; }) configs;
in in
# We simply run each program one after another
w.launch jobs stdexp.genExperiment { inherit configs pipeline; }

View File

@ -1,176 +1,76 @@
{ {
stdenv stdenv
, nixpkgs , stdexp
, pkgs , bsc
, genApp , targetMachine
, genConfigs , stages
, runWrappers
}: }:
with stdenv.lib; with stdenv.lib;
let let
bsc = pkgs.bsc; # Initial variable configuration
varConf = {
# Set variable configuration for the experiment
varConfig = {
cc = [ bsc.icc ]; # [ bsc.icc pkgs.gfortran10 ];
mpi = [ bsc.impi ]; # [ bsc.impi bsc.openmpi-mn4 ];
input = [ input = [
{ nodes=1 ; nprocz=48 ; granul=0; time= "10:00:00"; } { time="10:00:00"; nodes=1; }
{ nodes=2 ; nprocz=96 ; granul=0; time= "05:00:00"; } { time="05:00:00"; nodes=2; }
{ nodes=4 ; nprocz=192; granul=0; time= "03:00:00"; } { time="03:00:00"; nodes=4; }
{ nodes=8 ; nprocz=384; granul=0; time= "02:00:00"; } { time="02:00:00"; nodes=8; }
{ nodes=16; nprocz=768; granul=0; time= "01:00:00"; } { time="01:00:00"; nodes=16; }
]; ];
gitBranch = [ "garlic/mpi+send+seq" ];
}; };
# Common configuration # Generate the complete configuration for each unit
common = { genConf = with bsc; c: targetMachine.config // rec {
# Options for creams
cc = icc;
mpi = impi;
granul = 0;
gitBranch = "garlic/mpi+send+seq";
nprocz = 48 * nodes;
# Repeat the execution of each unit 30 times
loops = 30;
# Resources # Resources
ntasksPerNode = 48; qos = "debug";
#ntasksPerSocket = 24; // Add this variable to nix ntasksPerNode = 48;
inherit (c.input) time nodes;
# Stage configuration cpuBind = "rank,verbose";
enableSbatch = true; jobName = "creams-ss-${toString nodes}-${gitBranch}";
enableControl = true;
enableExtrae = false;
enablePerf = false;
enableCtf = false;
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
}; };
# Compute the cartesian product of all configurations # Compute the array of configurations
configs = map (conf: conf // common) (genConfigs varConfig); configs = stdexp.buildConfigs {
inherit varConf genConf;
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
sbatch = {stage, conf, ...}: with conf; w.sbatch {
nodes = input.nodes;
program = stageProgram stage;
exclusive = true;
time = input.time;
#qos = "debug";
jobName = "creams-ss-${toString input.nodes}-${toString gitBranch}";
inherit nixPrefix ntasksPerNode;
}; };
control = {stage, conf, ...}: with conf; w.control { # Custom stage to copy the creams input dataset
program = stageProgram stage; copyInput = {nextStage, conf, ...}:
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,rank";
inherit nixPrefix;
};
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
nixsetup = "${nixPrefix}/bin/nix-setup";
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
ctf = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
export NANOS6=ctf
export NANOS6_CTF2PRV=0
'';
};
bscOverlay = import ../../../overlay.nix;
genPkgs = newOverlay: nixpkgs {
overlays = [
bscOverlay
newOverlay
];
};
inputDataset = {stage, conf, ...}:
let
input = bsc.garlic.creamsInput.override {
gitBranch = conf.gitBranch;
granul = conf.input.granul;
nprocz = conf.input.nprocz;
};
in w.argv
{
program = stageProgram stage;
env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
# We may be able to use overlays by invoking the fix function directly, but we
# have to get the definition of the bsc packages and the garlic ones as
# overlays.
creamsFn = {stage, conf, ...}: with conf;
let let
# We set the mpi implementation to the one specified in the conf, so all input = bsc.garlic.apps.creamsInput.override {
# packages in bsc will use that one. inherit (conf) gitBranch granul nprocz;
customPkgs = genPkgs (self: super: { };
bsc = super.bsc // { mpi = conf.mpi; };
});
in in
customPkgs.bsc.garlic.creams.override { stages.exec {
inherit cc mpi gitBranch; inherit nextStage;
}; env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
stages = with common; [] # Creams program
# Use sbatch to request resources first creams = {nextStage, conf, ...}: with conf;
++ optional enableSbatch sbatch let
customPkgs = stdexp.replaceMpi conf.mpi;
in
customPkgs.apps.creams.override {
inherit cc mpi gitBranch;
};
# Repeats the next stages N times pipeline = stdexp.stdPipeline ++ [ copyInput creams ];
++ optionals enableControl [ nixsetup control ]
# Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace.
++ [ srun nixsetup ]
# Intrumentation with extrae
++ optional enableExtrae extrae
# Optionally profile the next stages with perf
++ optional enablePerf perf
# Optionally profile nanos6 with the new ctf
++ optional enableCtf ctf
# Execute the app with the argv and env vars
++ [ inputDataset creamsFn ];
# List of actual programs to be executed
jobs = map (conf: w.stagen { inherit conf stages; }) configs;
in in
# We simply run each program one after another
w.launch jobs stdexp.genExperiment { inherit configs pipeline; }

View File

@ -9,16 +9,17 @@
with stdenv.lib; with stdenv.lib;
let let
# Configurations for each unit (using the cartesian product) # Initial variable configuration
confUnit = with bsc; { varConf = with bsc; {
blocksize = [ 1024 2048 ]; blocksize = [ 1024 2048 ];
}; };
# Configuration for the complete experiment # Generate the complete configuration for each unit
confExperiment = with bsc; { genConf = with bsc; c: targetMachine.config // rec {
# nbody options # nbody options
particles = 1024 * 4; particles = 1024 * 4;
timesteps = 10; timesteps = 10;
inherit (c) blocksize;
cc = icc; cc = icc;
mpi = impi; mpi = impi;
gitBranch = "garlic/mpi+send"; gitBranch = "garlic/mpi+send";
@ -27,15 +28,16 @@ let
loops = 30; loops = 30;
# Resources # Resources
qos = "debug";
ntasksPerNode = 2; ntasksPerNode = 2;
nodes = 1; nodes = 1;
cpuBind = "sockets,verbose"; cpuBind = "sockets,verbose";
jobName = "nbody-bs-${toString blocksize}-${gitBranch}";
}; };
# Compute the array of configurations # Compute the array of configurations
configs = stdexp.buildConfigs { configs = stdexp.buildConfigs {
var = confUnit; inherit varConf genConf;
fixed = targetMachine.config // confExperiment;
}; };
exec = {nextStage, conf, ...}: with conf; stages.exec { exec = {nextStage, conf, ...}: with conf; stages.exec {
@ -51,7 +53,8 @@ let
inherit cc blocksize mpi gitBranch; inherit cc blocksize mpi gitBranch;
}; };
pipeline = stdexp.stdStages ++ [ exec program ]; pipeline = stdexp.stdUnitPre {sbatch=mySbatch;}
++ [ exec program ];
in in

View File

@ -25,52 +25,52 @@ rec {
}; };
}; };
/* Given an attrset of lists `var` and an attrset `fixed`, computes the /* Given an attrset of lists `varConf` and a function `genConf` that accepts a
cartesian product of all combinations of `var` and prepends `fixed` attrset, computes the cartesian product of all combinations of `varConf` calls
to each. */ genConf to produce the final list of configurations. */
buildConfigs = {fixed, var}: buildConfigs = {varConf, genConf}:
map (c: fixed // c) (genConfigs var); map (c: genConf c) (genConfigs varConf);
sbatch = {nextStage, conf, ...}: with conf; stages.sbatch ( stdStages = {
# Allow a user to define a custom reservation for the job in MareNostrum4, sbatch = {nextStage, conf, ...}: with conf; stages.sbatch (
# by setting the garlic.sbatch.reservation attribute in the # Allow a user to define a custom reservation for the job in MareNostrum4,
# ~/.config/nixpkgs/config.nix file. If the attribute is not set, no # by setting the garlic.sbatch.reservation attribute in the
# reservation is used. The user reservation may be overwritten by the # ~/.config/nixpkgs/config.nix file. If the attribute is not set, no
# experiment, if the reservation is set like with nodes or ntasksPerNode. # reservation is used. The user reservation may be overwritten by the
optionalAttrs (config ? garlic.sbatch.reservation) { # experiment, if the reservation is set like with nodes or ntasksPerNode.
inherit (config.garlic.sbatch) reservation; optionalAttrs (config ? garlic.sbatch.reservation) {
} // { inherit (config.garlic.sbatch) reservation;
exclusive = true; } // {
time = "02:00:00"; exclusive = true;
qos = "debug"; inherit nextStage nixPrefix nodes ntasksPerNode time qos jobName;
jobName = "nbody-tampi"; }
inherit nextStage nixPrefix nodes ntasksPerNode; );
}
);
control = {nextStage, conf, ...}: stages.control { control = {nextStage, conf, ...}: stages.control {
inherit (conf) loops; inherit (conf) loops;
inherit nextStage; inherit nextStage;
};
srun = {nextStage, conf, ...}: stages.srun {
inherit (conf) nixPrefix cpuBind;
inherit nextStage;
};
isolate = {nextStage, conf, ...}: stages.isolate {
clusterName = machineConf.name;
inherit (conf) nixPrefix;
inherit nextStage;
};
}; };
srun = {nextStage, conf, ...}: stages.srun { stdPipelineOverride = {overrides ? {}}:
inherit (conf) nixPrefix cpuBind; let
inherit nextStage; stages = stdStages // overrides;
}; in
with stages; [ sbatch isolate control srun isolate ];
isolate = {nextStage, conf, ...}: stages.isolate {
clusterName = machineConf.name;
inherit (conf) nixPrefix;
inherit nextStage;
};
stdStages = [ stdPipeline = stdPipelineOverride {};
sbatch
isolate
control
srun
isolate
];
# FIXME: Remove this hack and allow custom nixpkgs # FIXME: Remove this hack and allow custom nixpkgs
bscOverlay = import ../overlay.nix; bscOverlay = import ../overlay.nix;