hpcg: refactor ss and gen using a common file

- The file gen.nix now provides an experiment for each unit, to reduce
  the evaluation time.

- The pipeline is specified in the common.nix file only.

- The input dataset path is no longer symlinked, but is specified in the
  "--load" argument.

- The size is renamed to "sizePerTask" instead of "n".
This commit is contained in:
Rodrigo Arias 2021-04-16 11:51:34 +02:00
parent 9bb570af7f
commit b4e37a15a9
5 changed files with 165 additions and 187 deletions

View File

@ -0,0 +1,72 @@
{
stdenv
, stdexp
, bsc
, stages
, callPackage
}:
with stdenv.lib;
rec {
checkInput = {nextStage, conf, ...}: stages.exec {
inherit nextStage;
pre = optionalString (! (conf.enableGen or false)) (
let
gen = callPackage ./gen.nix { };
inputTre = gen.getInputTre conf;
exp = inputTre.experiment;
unit = elemAt exp.units 0;
expName = baseNameOf (toString exp);
unitName = baseNameOf (toString unit);
inputPath = "$GARLIC_OUT/${expName}/${unitName}/1";
in
''
# Force the generation of the input resultTree as a dependency:
# ${toString inputTre.result}
# Ensure the input dataset is still available
export HPCG_INPUT_PATH="${toString inputPath}"
if [ ! -e "$HPCG_INPUT_PATH" ]; then
>&2 echo "Missing input dataset: $HPCG_INPUT_PATH"
exit 1
fi
''
);
};
getSizePerTask = cpusPerTask: sizePerCpu:
mapAttrs (name: val: val * cpusPerTask) sizePerCpu;
exec = {nextStage, conf, ...}: let
actionArg = if (conf.enableGen or false)
then "--store=."
else "--load=\"$HPCG_INPUT_PATH\"";
in stages.exec {
inherit nextStage;
argv = [
"--nx=${toString conf.sizePerTask.x}"
"--ny=${toString conf.sizePerTask.y}"
"--nz=${toString conf.sizePerTask.z}"
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
"--nblocks=${toString conf.nblocks}"
"--ncomms=${toString conf.ncomms}"
# The input symlink is generated by the input stage, which is generated by
# the genInput function.
actionArg
] ++ optional (conf.disableAspectRatio or false) "--no-ar=1";
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [
checkInput
exec program ];
}

View File

@ -5,95 +5,45 @@
, targetMachine
, stages
, garlicTools
, callPackage
}:
with stdenv.lib;
with builtins;
with garlicTools;
let
rec {
# Generate the complete configuration for each unit
genConf = c: targetMachine.config // rec {
expName = "${c.expName}.gen";
unitName = "${expName}.n${toString n.x}";
unitName = "${c.unitName}.gen";
inherit (targetMachine.config) hw;
# Only the n and gitBranch options are inherited
inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch;
# Inherit options from the current conf
inherit (c) sizePerTask nprocs disableAspectRatio gitBranch
cpusPerTask ntasksPerNode nodes;
# Repeat the execution of each unit 30 times
# nblocks and ncomms are ignored from c
ncomms = 1;
nblocks = 1;
# We only need one run
loops = 1;
# Generate the input
enableGen = true;
# Resources
qos = "debug";
# ntasksPerNode = hw.socketsPerNode;
# nodes = 2;
time = "00:30:00";
# task in one socket
cpusPerTask = hw.cpusPerSocket;
time = "02:00:00";
jobName = unitName;
};
exec = {nextStage, conf, ...}: with conf; stages.exec {
inherit nextStage;
argv = [
"--nx=${toString conf.n.x}"
"--ny=${toString conf.n.y}"
"--nz=${toString conf.n.z}"
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
# nblocks and ncomms are ignored
"--nblocks=1"
"--ncomms=1"
# Store the results in the same directory
"--store=."
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
common = callPackage ./common.nix {};
getInputTre = conf: stdexp.genExperiment {
configs = [ (genConf conf) ];
pipeline = common.pipeline;
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [ exec program ];
genExp = configs: stdexp.genExperiment { inherit configs pipeline; };
genInputLink = inputConfigs: {nextStage, conf, ...}:
let
# Compute the experiment that produces HPCG input matrix from the
# configuration of this unit:
configs = map genConf inputConfigs;
inputTre = genExp configs;
#inputExp = getExperimentStage inputTrebuchet;
#inputExp = trace inputTrebuchet inputTrebuchet.nextStage;
inputExp = getExperimentStage inputTre;
# Then load the result. This is only used to ensure that we have the
# results, so it has been executed.
inputRes = inputTre.result;
# We also need the unit, to compute the path.
inputUnit = stages.unit {
conf = genConf conf;
stages = pipeline;
};
# Build the path:
expName = baseNameOf (toString inputExp);
unitName = baseNameOf (toString inputUnit);
relPath = "../../${expName}/${unitName}/1";
in stages.exec {
inherit nextStage;
env = ''
# This line ensures that the results of the HPCG generation are complete:
# ${inputRes}
# Then we simply link the input result directory in "input"
# We use || true because all ranks will execute this and
# the execution will fail
ln -sf ${relPath} input || true
'';
};
in
#{ inherit genConf genExp genInputLink; }
genInputLink
}

View File

@ -1,112 +0,0 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
, genInput
}:
with stdenv.lib;
let
# Initial variable configuration
varConf = {
n = [
{ x = 192 / 4; y = 192 / 4; z = 16 * 192; }
];
nprocs = [
# { x = 2; y = 1; z = 1; }
# { x = 4; y = 1; z = 1; }
# { x = 8; y = 1; z = 1; }
# { x = 16; y = 1; z = 1; }
# { x = 32; y = 1; z = 1; }
# { x = 1; y = 2; z = 1; }
# { x = 1; y = 4; z = 1; }
# { x = 1; y = 8; z = 1; }
# { x = 1; y = 16; z = 1; }
# { x = 1; y = 32; z = 1; }
{ x = 1; y = 1; z = 2; }
{ x = 1; y = 1; z = 4; }
{ x = 1; y = 1; z = 8; }
{ x = 1; y = 1; z = 16; }
{ x = 1; y = 1; z = 32; }
];
# nblocks = [ 12 24 48 96 192 384 768 1536 ];
nblocks = [ 24 48 96 192 384 ];
ncommblocks = [ 1 ];
# nodes = [ 1 ];
# nodes = [ 1 2 4 8 16 ];
};
# Generate the complete configuration for each unit
genConf = c: targetMachine.config // rec {
expName = "hpcg.oss";
unitName = "${expName}.nb${toString nblocks}";
inherit (targetMachine.config) hw;
# hpcg options
inherit (c) nprocs nblocks ncommblocks;
n = {
x = c.n.x / nprocs.x;
y = c.n.y / nprocs.y;
z = c.n.z / nprocs.z;
};
gitBranch = "garlic/tampi+isend+oss+task";
# Repeat the execution of each unit 30 times
loops = 10;
disableAspectRatio = true;
# Resources
qos = "debug";
ntasksPerNode = hw.socketsPerNode;
time = "02:00:00";
# task in one socket
cpusPerTask = hw.cpusPerSocket;
nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
input = genInput configs;
exec = {nextStage, conf, ...}: stages.exec {
inherit nextStage;
argv = [
"--nx=${toString conf.n.x}"
"--ny=${toString conf.n.y}"
"--nz=${toString conf.n.z}"
# Distribute all processes in X axis
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
"--nblocks=${toString conf.nblocks}"
"--ncomms=${toString conf.ncommblocks}"
# The input symlink is generated by the input stage, which is generated by
# the genInput function.
"--load=input"
# Disable HPCG Aspect Ratio to run any mpi layout
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [ input exec program ];
in
stdexp.genExperiment { inherit configs pipeline; }

68
garlic/exp/hpcg/ss.nix Normal file
View File

@ -0,0 +1,68 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
, garlicTools
, callPackage
, enableExtended ? false
}:
with stdenv.lib;
with garlicTools;
let
common = callPackage ./common.nix { };
inherit (common) pipeline getSizePerTask;
# Initial variable configuration
varConf = {
nodes = range2 1 16;
blocksPerCpu = if (enableExtended)
then range2 1 8
else [ 4 ];
gitBranch = [
"garlic/tampi+isend+oss+task"
];
};
# Generate the complete configuration for each unit
genConf = c: targetMachine.config // rec {
expName = "hpcg-ss";
unitName = "${expName}"
+ "-nodes${toString nodes}"
+ "-bpc${toString blocksPerCpu}";
inherit (targetMachine.config) hw;
# hpcg options
inherit (c) nodes blocksPerCpu gitBranch;
totalTasks = ntasksPerNode * nodes;
sizePerCpu = { x=2; y=2; z=128 / totalTasks; };
sizePerTask = getSizePerTask cpusPerTask sizePerCpu;
nprocs = { x=1; y=1; z=totalTasks; };
nblocks = blocksPerCpu * cpusPerTask;
ncomms = 1;
disableAspectRatio = true;
# Repeat the execution of each unit several times
loops = 10;
# Resources
qos = "debug";
time = "02:00:00";
cpusPerTask = hw.cpusPerSocket;
ntasksPerNode = hw.socketsPerNode;
jobName = unitName;
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
in
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -65,9 +65,9 @@
inherit genInput;
};
ossSlicesStrongscaling = callPackage ./hpcg/oss.slices.strongscaling.nix {
inherit genInput;
};
ss = callPackage ./hpcg/ss.nix { };
big.ss = ss.override { enableExtended = true; };
};
heat = rec {