New config design

This commit is contained in:
Rodrigo Arias 2020-10-09 19:33:06 +02:00
parent 9020f87765
commit 298c7362b3
4 changed files with 158 additions and 352 deletions

View File

@ -1,23 +1,16 @@
{
stdenv
, nixpkgs
, pkgs
, genApp
, genConfigs
, runWrappers
, stdexp
, bsc
, targetMachine
, stages
}:
with stdenv.lib;
let
bsc = pkgs.bsc;
# Set variable configuration for the experiment
varConfig = {
cc = [ bsc.icc ]; # [ bsc.icc pkgs.gfortran10 ];
mpi = [ bsc.impi ]; # [ bsc.impi bsc.openmpi-mn4 ];
# Initial variable configuration
varConf = {
input = [
{ nodes=1 ; nprocz=2 ; granul=37; time= "10:00:00"; }
{ nodes=2 ; nprocz=4 ; granul=19; time= "05:00:00"; }
@ -26,155 +19,65 @@ let
{ nodes=16; nprocz=32; granul=9 ; time= "01:00:00"; }
];
gitBranch = [ "garlic/mpi+isend+oss+task"
"garlic/mpi+send+omp+fork"
"garlic/mpi+send+oss+task"
"garlic/tampi+isend+oss+task"
gitBranch = [
"garlic/mpi+isend+oss+task"
"garlic/mpi+send+omp+fork"
"garlic/mpi+send+oss+task"
"garlic/tampi+isend+oss+task"
];
};
# Common configuration
common = {
# Generate the complete configuration for each unit
genConf = with bsc; c: targetMachine.config // rec {
# Options for creams
cc = icc;
mpi = impi;
inherit (c.input) granul;
inherit (c) gitBranch;
nprocz = 2 * nodes;
# Repeat the execution of each unit 30 times
loops = 30;
# Resources
ntasksPerNode = 2;
#ntasksPerSocket = 1; // Add this variable to nix
# Stage configuration
enableSbatch = true;
enableControl = true;
enableExtrae = false;
enablePerf = false;
enableCtf = false;
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
qos = "debug";
ntasksPerNode = 2;
inherit (c.input) time nodes;
cpuBind = "socket,verbose";
jobName = "creams-ss-${toString nodes}-${gitBranch}";
};
# Compute the cartesian product of all configurations
configs = map (conf: conf // common) (genConfigs varConfig);
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
sbatch = {stage, conf, ...}: with conf; w.sbatch {
nodes = input.nodes;
program = stageProgram stage;
exclusive = true;
time = input.time;
#qos = "debug";
jobName = "creams-ss-${toString input.nodes}-${toString gitBranch}";
inherit nixPrefix ntasksPerNode;
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
control = {stage, conf, ...}: with conf; w.control {
program = stageProgram stage;
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,socket";
inherit nixPrefix;
};
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
nixsetup = "${nixPrefix}/bin/nix-setup";
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
ctf = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
export NANOS6=ctf
export NANOS6_CTF2PRV=0
'';
};
bscOverlay = import ../../../overlay.nix;
genPkgs = newOverlay: nixpkgs {
overlays = [
bscOverlay
newOverlay
];
};
inputDataset = {stage, conf, ...}:
let
input = bsc.garlic.creamsInput.override {
gitBranch = conf.gitBranch;
granul = conf.input.granul;
nprocz = conf.input.nprocz;
};
in w.argv
{
program = stageProgram stage;
env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
# We may be able to use overlays by invoking the fix function directly, but we
# have to get the definition of the bsc packages and the garlic ones as
# overlays.
creamsFn = {stage, conf, ...}: with conf;
# Custom stage to copy the creams input dataset
copyInput = {nextStage, conf, ...}:
let
# We set the mpi implementation to the one specified in the conf, so all
# packages in bsc will use that one.
customPkgs = genPkgs (self: super: {
bsc = super.bsc // { mpi = conf.mpi; };
});
input = bsc.garlic.apps.creamsInput.override {
inherit (conf) gitBranch granul nprocz;
};
in
customPkgs.bsc.garlic.creams.override {
inherit cc mpi gitBranch;
};
stages.exec {
inherit nextStage;
env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
stages = with common; []
# Use sbatch to request resources first
++ optional enableSbatch sbatch
# Creams program
creams = {nextStage, conf, ...}: with conf;
let
customPkgs = stdexp.replaceMpi conf.mpi;
in
customPkgs.apps.creams.override {
inherit cc mpi gitBranch;
};
# Repeats the next stages N times
++ optionals enableControl [ nixsetup control ]
# Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace.
++ [ srun nixsetup ]
# Intrumentation with extrae
++ optional enableExtrae extrae
# Optionally profile the next stages with perf
++ optional enablePerf perf
# Optionally profile nanos6 with the new ctf
++ optional enableCtf ctf
# Execute the app with the argv and env vars
++ [ inputDataset creamsFn ];
# List of actual programs to be executed
jobs = map (conf: w.stagen { inherit conf stages; }) configs;
pipeline = stdexp.stdPipeline ++ [ copyInput creams ];
in
# We simply run each program one after another
w.launch jobs
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -1,176 +1,76 @@
{
stdenv
, nixpkgs
, pkgs
, genApp
, genConfigs
, runWrappers
, stdexp
, bsc
, targetMachine
, stages
}:
with stdenv.lib;
let
bsc = pkgs.bsc;
# Set variable configuration for the experiment
varConfig = {
cc = [ bsc.icc ]; # [ bsc.icc pkgs.gfortran10 ];
mpi = [ bsc.impi ]; # [ bsc.impi bsc.openmpi-mn4 ];
# Initial variable configuration
varConf = {
input = [
{ nodes=1 ; nprocz=48 ; granul=0; time= "10:00:00"; }
{ nodes=2 ; nprocz=96 ; granul=0; time= "05:00:00"; }
{ nodes=4 ; nprocz=192; granul=0; time= "03:00:00"; }
{ nodes=8 ; nprocz=384; granul=0; time= "02:00:00"; }
{ nodes=16; nprocz=768; granul=0; time= "01:00:00"; }
{ time="10:00:00"; nodes=1; }
{ time="05:00:00"; nodes=2; }
{ time="03:00:00"; nodes=4; }
{ time="02:00:00"; nodes=8; }
{ time="01:00:00"; nodes=16; }
];
gitBranch = [ "garlic/mpi+send+seq" ];
};
# Common configuration
common = {
# Generate the complete configuration for each unit
genConf = with bsc; c: targetMachine.config // rec {
# Options for creams
cc = icc;
mpi = impi;
granul = 0;
gitBranch = "garlic/mpi+send+seq";
nprocz = 48 * nodes;
# Repeat the execution of each unit 30 times
loops = 30;
# Resources
ntasksPerNode = 48;
#ntasksPerSocket = 24; // Add this variable to nix
# Stage configuration
enableSbatch = true;
enableControl = true;
enableExtrae = false;
enablePerf = false;
enableCtf = false;
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
qos = "debug";
ntasksPerNode = 48;
inherit (c.input) time nodes;
cpuBind = "rank,verbose";
jobName = "creams-ss-${toString nodes}-${gitBranch}";
};
# Compute the cartesian product of all configurations
configs = map (conf: conf // common) (genConfigs varConfig);
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
sbatch = {stage, conf, ...}: with conf; w.sbatch {
nodes = input.nodes;
program = stageProgram stage;
exclusive = true;
time = input.time;
#qos = "debug";
jobName = "creams-ss-${toString input.nodes}-${toString gitBranch}";
inherit nixPrefix ntasksPerNode;
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
control = {stage, conf, ...}: with conf; w.control {
program = stageProgram stage;
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,rank";
inherit nixPrefix;
};
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
nixsetup = "${nixPrefix}/bin/nix-setup";
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
ctf = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
export NANOS6=ctf
export NANOS6_CTF2PRV=0
'';
};
bscOverlay = import ../../../overlay.nix;
genPkgs = newOverlay: nixpkgs {
overlays = [
bscOverlay
newOverlay
];
};
inputDataset = {stage, conf, ...}:
let
input = bsc.garlic.creamsInput.override {
gitBranch = conf.gitBranch;
granul = conf.input.granul;
nprocz = conf.input.nprocz;
};
in w.argv
{
program = stageProgram stage;
env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
# We may be able to use overlays by invoking the fix function directly, but we
# have to get the definition of the bsc packages and the garlic ones as
# overlays.
creamsFn = {stage, conf, ...}: with conf;
# Custom stage to copy the creams input dataset
copyInput = {nextStage, conf, ...}:
let
# We set the mpi implementation to the one specified in the conf, so all
# packages in bsc will use that one.
customPkgs = genPkgs (self: super: {
bsc = super.bsc // { mpi = conf.mpi; };
});
input = bsc.garlic.apps.creamsInput.override {
inherit (conf) gitBranch granul nprocz;
};
in
customPkgs.bsc.garlic.creams.override {
inherit cc mpi gitBranch;
};
stages.exec {
inherit nextStage;
env = ''
cp -r ${input}/SodTubeBenchmark/* .
chmod +w -R .
'';
};
stages = with common; []
# Use sbatch to request resources first
++ optional enableSbatch sbatch
# Creams program
creams = {nextStage, conf, ...}: with conf;
let
customPkgs = stdexp.replaceMpi conf.mpi;
in
customPkgs.apps.creams.override {
inherit cc mpi gitBranch;
};
# Repeats the next stages N times
++ optionals enableControl [ nixsetup control ]
# Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace.
++ [ srun nixsetup ]
# Intrumentation with extrae
++ optional enableExtrae extrae
# Optionally profile the next stages with perf
++ optional enablePerf perf
# Optionally profile nanos6 with the new ctf
++ optional enableCtf ctf
# Execute the app with the argv and env vars
++ [ inputDataset creamsFn ];
# List of actual programs to be executed
jobs = map (conf: w.stagen { inherit conf stages; }) configs;
pipeline = stdexp.stdPipeline ++ [ copyInput creams ];
in
# We simply run each program one after another
w.launch jobs
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -9,16 +9,17 @@
with stdenv.lib;
let
# Configurations for each unit (using the cartesian product)
confUnit = with bsc; {
# Initial variable configuration
varConf = with bsc; {
blocksize = [ 1024 2048 ];
};
# Configuration for the complete experiment
confExperiment = with bsc; {
# Generate the complete configuration for each unit
genConf = with bsc; c: targetMachine.config // rec {
# nbody options
particles = 1024 * 4;
timesteps = 10;
inherit (c) blocksize;
cc = icc;
mpi = impi;
gitBranch = "garlic/mpi+send";
@ -27,15 +28,16 @@ let
loops = 30;
# Resources
qos = "debug";
ntasksPerNode = 2;
nodes = 1;
cpuBind = "sockets,verbose";
jobName = "nbody-bs-${toString blocksize}-${gitBranch}";
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
var = confUnit;
fixed = targetMachine.config // confExperiment;
inherit varConf genConf;
};
exec = {nextStage, conf, ...}: with conf; stages.exec {
@ -51,7 +53,8 @@ let
inherit cc blocksize mpi gitBranch;
};
pipeline = stdexp.stdStages ++ [ exec program ];
pipeline = stdexp.stdUnitPre {sbatch=mySbatch;}
++ [ exec program ];
in

View File

@ -25,52 +25,52 @@ rec {
};
};
/* Given an attrset of lists `var` and an attrset `fixed`, computes the
cartesian product of all combinations of `var` and prepends `fixed`
to each. */
buildConfigs = {fixed, var}:
map (c: fixed // c) (genConfigs var);
/* Given an attrset of lists `varConf` and a function `genConf` that accepts a
attrset, computes the cartesian product of all combinations of `varConf` calls
genConf to produce the final list of configurations. */
buildConfigs = {varConf, genConf}:
map (c: genConf c) (genConfigs varConf);
sbatch = {nextStage, conf, ...}: with conf; stages.sbatch (
# Allow a user to define a custom reservation for the job in MareNostrum4,
# by setting the garlic.sbatch.reservation attribute in the
# ~/.config/nixpkgs/config.nix file. If the attribute is not set, no
# reservation is used. The user reservation may be overwritten by the
# experiment, if the reservation is set like with nodes or ntasksPerNode.
optionalAttrs (config ? garlic.sbatch.reservation) {
inherit (config.garlic.sbatch) reservation;
} // {
exclusive = true;
time = "02:00:00";
qos = "debug";
jobName = "nbody-tampi";
inherit nextStage nixPrefix nodes ntasksPerNode;
}
);
stdStages = {
sbatch = {nextStage, conf, ...}: with conf; stages.sbatch (
# Allow a user to define a custom reservation for the job in MareNostrum4,
# by setting the garlic.sbatch.reservation attribute in the
# ~/.config/nixpkgs/config.nix file. If the attribute is not set, no
# reservation is used. The user reservation may be overwritten by the
# experiment, if the reservation is set like with nodes or ntasksPerNode.
optionalAttrs (config ? garlic.sbatch.reservation) {
inherit (config.garlic.sbatch) reservation;
} // {
exclusive = true;
inherit nextStage nixPrefix nodes ntasksPerNode time qos jobName;
}
);
control = {nextStage, conf, ...}: stages.control {
inherit (conf) loops;
inherit nextStage;
control = {nextStage, conf, ...}: stages.control {
inherit (conf) loops;
inherit nextStage;
};
srun = {nextStage, conf, ...}: stages.srun {
inherit (conf) nixPrefix cpuBind;
inherit nextStage;
};
isolate = {nextStage, conf, ...}: stages.isolate {
clusterName = machineConf.name;
inherit (conf) nixPrefix;
inherit nextStage;
};
};
srun = {nextStage, conf, ...}: stages.srun {
inherit (conf) nixPrefix cpuBind;
inherit nextStage;
};
stdPipelineOverride = {overrides ? {}}:
let
stages = stdStages // overrides;
in
with stages; [ sbatch isolate control srun isolate ];
isolate = {nextStage, conf, ...}: stages.isolate {
clusterName = machineConf.name;
inherit (conf) nixPrefix;
inherit nextStage;
};
stdStages = [
sbatch
isolate
control
srun
isolate
];
stdPipeline = stdPipelineOverride {};
# FIXME: Remove this hack and allow custom nixpkgs
bscOverlay = import ../overlay.nix;