hpcg: refactor ss and gen using a common file
- The file gen.nix now provides an experiment for each unit, to reduce the evaluation time. - The pipeline is specified in the common.nix file only. - The input dataset path is no longer symlinked, but is specified in the "--load" argument. - The size is renamed to "sizePerTask" instead of "n".
This commit is contained in:
parent
9bb570af7f
commit
b4e37a15a9
72
garlic/exp/hpcg/common.nix
Normal file
72
garlic/exp/hpcg/common.nix
Normal file
@ -0,0 +1,72 @@
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
, bsc
|
||||
, stages
|
||||
, callPackage
|
||||
}:
|
||||
|
||||
with stdenv.lib;
|
||||
|
||||
rec {
|
||||
|
||||
checkInput = {nextStage, conf, ...}: stages.exec {
|
||||
inherit nextStage;
|
||||
pre = optionalString (! (conf.enableGen or false)) (
|
||||
let
|
||||
gen = callPackage ./gen.nix { };
|
||||
inputTre = gen.getInputTre conf;
|
||||
exp = inputTre.experiment;
|
||||
unit = elemAt exp.units 0;
|
||||
expName = baseNameOf (toString exp);
|
||||
unitName = baseNameOf (toString unit);
|
||||
inputPath = "$GARLIC_OUT/${expName}/${unitName}/1";
|
||||
in
|
||||
''
|
||||
# Force the generation of the input resultTree as a dependency:
|
||||
# ${toString inputTre.result}
|
||||
|
||||
# Ensure the input dataset is still available
|
||||
export HPCG_INPUT_PATH="${toString inputPath}"
|
||||
|
||||
if [ ! -e "$HPCG_INPUT_PATH" ]; then
|
||||
>&2 echo "Missing input dataset: $HPCG_INPUT_PATH"
|
||||
exit 1
|
||||
fi
|
||||
''
|
||||
);
|
||||
};
|
||||
|
||||
getSizePerTask = cpusPerTask: sizePerCpu:
|
||||
mapAttrs (name: val: val * cpusPerTask) sizePerCpu;
|
||||
|
||||
exec = {nextStage, conf, ...}: let
|
||||
actionArg = if (conf.enableGen or false)
|
||||
then "--store=."
|
||||
else "--load=\"$HPCG_INPUT_PATH\"";
|
||||
|
||||
in stages.exec {
|
||||
inherit nextStage;
|
||||
argv = [
|
||||
"--nx=${toString conf.sizePerTask.x}"
|
||||
"--ny=${toString conf.sizePerTask.y}"
|
||||
"--nz=${toString conf.sizePerTask.z}"
|
||||
"--npx=${toString conf.nprocs.x}"
|
||||
"--npy=${toString conf.nprocs.y}"
|
||||
"--npz=${toString conf.nprocs.z}"
|
||||
"--nblocks=${toString conf.nblocks}"
|
||||
"--ncomms=${toString conf.ncomms}"
|
||||
# The input symlink is generated by the input stage, which is generated by
|
||||
# the genInput function.
|
||||
actionArg
|
||||
] ++ optional (conf.disableAspectRatio or false) "--no-ar=1";
|
||||
};
|
||||
|
||||
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
|
||||
inherit (conf) gitBranch;
|
||||
};
|
||||
|
||||
pipeline = stdexp.stdPipeline ++ [
|
||||
checkInput
|
||||
exec program ];
|
||||
}
|
@ -5,95 +5,45 @@
|
||||
, targetMachine
|
||||
, stages
|
||||
, garlicTools
|
||||
, callPackage
|
||||
}:
|
||||
|
||||
with stdenv.lib;
|
||||
with builtins;
|
||||
with garlicTools;
|
||||
|
||||
let
|
||||
rec {
|
||||
|
||||
# Generate the complete configuration for each unit
|
||||
genConf = c: targetMachine.config // rec {
|
||||
expName = "${c.expName}.gen";
|
||||
unitName = "${expName}.n${toString n.x}";
|
||||
unitName = "${c.unitName}.gen";
|
||||
|
||||
inherit (targetMachine.config) hw;
|
||||
|
||||
# Only the n and gitBranch options are inherited
|
||||
inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch;
|
||||
# Inherit options from the current conf
|
||||
inherit (c) sizePerTask nprocs disableAspectRatio gitBranch
|
||||
cpusPerTask ntasksPerNode nodes;
|
||||
|
||||
# Repeat the execution of each unit 30 times
|
||||
# nblocks and ncomms are ignored from c
|
||||
ncomms = 1;
|
||||
nblocks = 1;
|
||||
|
||||
# We only need one run
|
||||
loops = 1;
|
||||
|
||||
# Generate the input
|
||||
enableGen = true;
|
||||
|
||||
# Resources
|
||||
qos = "debug";
|
||||
# ntasksPerNode = hw.socketsPerNode;
|
||||
# nodes = 2;
|
||||
time = "00:30:00";
|
||||
# task in one socket
|
||||
cpusPerTask = hw.cpusPerSocket;
|
||||
time = "02:00:00";
|
||||
jobName = unitName;
|
||||
};
|
||||
|
||||
exec = {nextStage, conf, ...}: with conf; stages.exec {
|
||||
inherit nextStage;
|
||||
argv = [
|
||||
"--nx=${toString conf.n.x}"
|
||||
"--ny=${toString conf.n.y}"
|
||||
"--nz=${toString conf.n.z}"
|
||||
"--npx=${toString conf.nprocs.x}"
|
||||
"--npy=${toString conf.nprocs.y}"
|
||||
"--npz=${toString conf.nprocs.z}"
|
||||
# nblocks and ncomms are ignored
|
||||
"--nblocks=1"
|
||||
"--ncomms=1"
|
||||
# Store the results in the same directory
|
||||
"--store=."
|
||||
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
|
||||
common = callPackage ./common.nix {};
|
||||
|
||||
getInputTre = conf: stdexp.genExperiment {
|
||||
configs = [ (genConf conf) ];
|
||||
pipeline = common.pipeline;
|
||||
};
|
||||
|
||||
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
|
||||
inherit (conf) gitBranch;
|
||||
};
|
||||
|
||||
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
||||
|
||||
genExp = configs: stdexp.genExperiment { inherit configs pipeline; };
|
||||
|
||||
genInputLink = inputConfigs: {nextStage, conf, ...}:
|
||||
let
|
||||
# Compute the experiment that produces HPCG input matrix from the
|
||||
# configuration of this unit:
|
||||
configs = map genConf inputConfigs;
|
||||
inputTre = genExp configs;
|
||||
#inputExp = getExperimentStage inputTrebuchet;
|
||||
#inputExp = trace inputTrebuchet inputTrebuchet.nextStage;
|
||||
inputExp = getExperimentStage inputTre;
|
||||
# Then load the result. This is only used to ensure that we have the
|
||||
# results, so it has been executed.
|
||||
inputRes = inputTre.result;
|
||||
# We also need the unit, to compute the path.
|
||||
inputUnit = stages.unit {
|
||||
conf = genConf conf;
|
||||
stages = pipeline;
|
||||
};
|
||||
# Build the path:
|
||||
expName = baseNameOf (toString inputExp);
|
||||
unitName = baseNameOf (toString inputUnit);
|
||||
relPath = "../../${expName}/${unitName}/1";
|
||||
in stages.exec {
|
||||
inherit nextStage;
|
||||
env = ''
|
||||
# This line ensures that the results of the HPCG generation are complete:
|
||||
# ${inputRes}
|
||||
|
||||
# Then we simply link the input result directory in "input"
|
||||
# We use || true because all ranks will execute this and
|
||||
# the execution will fail
|
||||
ln -sf ${relPath} input || true
|
||||
'';
|
||||
};
|
||||
|
||||
in
|
||||
#{ inherit genConf genExp genInputLink; }
|
||||
genInputLink
|
||||
}
|
||||
|
@ -1,112 +0,0 @@
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
, bsc
|
||||
, targetMachine
|
||||
, stages
|
||||
, genInput
|
||||
}:
|
||||
|
||||
with stdenv.lib;
|
||||
|
||||
let
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
n = [
|
||||
{ x = 192 / 4; y = 192 / 4; z = 16 * 192; }
|
||||
];
|
||||
nprocs = [
|
||||
# { x = 2; y = 1; z = 1; }
|
||||
# { x = 4; y = 1; z = 1; }
|
||||
# { x = 8; y = 1; z = 1; }
|
||||
# { x = 16; y = 1; z = 1; }
|
||||
# { x = 32; y = 1; z = 1; }
|
||||
|
||||
# { x = 1; y = 2; z = 1; }
|
||||
# { x = 1; y = 4; z = 1; }
|
||||
# { x = 1; y = 8; z = 1; }
|
||||
# { x = 1; y = 16; z = 1; }
|
||||
# { x = 1; y = 32; z = 1; }
|
||||
|
||||
{ x = 1; y = 1; z = 2; }
|
||||
{ x = 1; y = 1; z = 4; }
|
||||
{ x = 1; y = 1; z = 8; }
|
||||
{ x = 1; y = 1; z = 16; }
|
||||
{ x = 1; y = 1; z = 32; }
|
||||
|
||||
];
|
||||
# nblocks = [ 12 24 48 96 192 384 768 1536 ];
|
||||
nblocks = [ 24 48 96 192 384 ];
|
||||
ncommblocks = [ 1 ];
|
||||
# nodes = [ 1 ];
|
||||
# nodes = [ 1 2 4 8 16 ];
|
||||
};
|
||||
|
||||
# Generate the complete configuration for each unit
|
||||
genConf = c: targetMachine.config // rec {
|
||||
expName = "hpcg.oss";
|
||||
unitName = "${expName}.nb${toString nblocks}";
|
||||
|
||||
inherit (targetMachine.config) hw;
|
||||
|
||||
# hpcg options
|
||||
inherit (c) nprocs nblocks ncommblocks;
|
||||
|
||||
n = {
|
||||
x = c.n.x / nprocs.x;
|
||||
y = c.n.y / nprocs.y;
|
||||
z = c.n.z / nprocs.z;
|
||||
};
|
||||
|
||||
gitBranch = "garlic/tampi+isend+oss+task";
|
||||
|
||||
# Repeat the execution of each unit 30 times
|
||||
loops = 10;
|
||||
|
||||
disableAspectRatio = true;
|
||||
|
||||
# Resources
|
||||
qos = "debug";
|
||||
ntasksPerNode = hw.socketsPerNode;
|
||||
time = "02:00:00";
|
||||
# task in one socket
|
||||
cpusPerTask = hw.cpusPerSocket;
|
||||
nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
|
||||
jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
|
||||
};
|
||||
|
||||
# Compute the array of configurations
|
||||
configs = stdexp.buildConfigs {
|
||||
inherit varConf genConf;
|
||||
};
|
||||
|
||||
input = genInput configs;
|
||||
|
||||
exec = {nextStage, conf, ...}: stages.exec {
|
||||
inherit nextStage;
|
||||
argv = [
|
||||
"--nx=${toString conf.n.x}"
|
||||
"--ny=${toString conf.n.y}"
|
||||
"--nz=${toString conf.n.z}"
|
||||
# Distribute all processes in X axis
|
||||
"--npx=${toString conf.nprocs.x}"
|
||||
"--npy=${toString conf.nprocs.y}"
|
||||
"--npz=${toString conf.nprocs.z}"
|
||||
"--nblocks=${toString conf.nblocks}"
|
||||
"--ncomms=${toString conf.ncommblocks}"
|
||||
# The input symlink is generated by the input stage, which is generated by
|
||||
# the genInput function.
|
||||
"--load=input"
|
||||
# Disable HPCG Aspect Ratio to run any mpi layout
|
||||
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
|
||||
};
|
||||
|
||||
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
|
||||
inherit (conf) gitBranch;
|
||||
};
|
||||
|
||||
pipeline = stdexp.stdPipeline ++ [ input exec program ];
|
||||
|
||||
in
|
||||
|
||||
stdexp.genExperiment { inherit configs pipeline; }
|
68
garlic/exp/hpcg/ss.nix
Normal file
68
garlic/exp/hpcg/ss.nix
Normal file
@ -0,0 +1,68 @@
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
, bsc
|
||||
, targetMachine
|
||||
, stages
|
||||
, garlicTools
|
||||
, callPackage
|
||||
, enableExtended ? false
|
||||
}:
|
||||
|
||||
with stdenv.lib;
|
||||
with garlicTools;
|
||||
|
||||
let
|
||||
common = callPackage ./common.nix { };
|
||||
|
||||
inherit (common) pipeline getSizePerTask;
|
||||
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
nodes = range2 1 16;
|
||||
blocksPerCpu = if (enableExtended)
|
||||
then range2 1 8
|
||||
else [ 4 ];
|
||||
gitBranch = [
|
||||
"garlic/tampi+isend+oss+task"
|
||||
];
|
||||
};
|
||||
|
||||
# Generate the complete configuration for each unit
|
||||
genConf = c: targetMachine.config // rec {
|
||||
expName = "hpcg-ss";
|
||||
unitName = "${expName}"
|
||||
+ "-nodes${toString nodes}"
|
||||
+ "-bpc${toString blocksPerCpu}";
|
||||
|
||||
inherit (targetMachine.config) hw;
|
||||
|
||||
# hpcg options
|
||||
inherit (c) nodes blocksPerCpu gitBranch;
|
||||
totalTasks = ntasksPerNode * nodes;
|
||||
sizePerCpu = { x=2; y=2; z=128 / totalTasks; };
|
||||
sizePerTask = getSizePerTask cpusPerTask sizePerCpu;
|
||||
nprocs = { x=1; y=1; z=totalTasks; };
|
||||
nblocks = blocksPerCpu * cpusPerTask;
|
||||
ncomms = 1;
|
||||
disableAspectRatio = true;
|
||||
|
||||
# Repeat the execution of each unit several times
|
||||
loops = 10;
|
||||
|
||||
# Resources
|
||||
qos = "debug";
|
||||
time = "02:00:00";
|
||||
cpusPerTask = hw.cpusPerSocket;
|
||||
ntasksPerNode = hw.socketsPerNode;
|
||||
jobName = unitName;
|
||||
};
|
||||
|
||||
# Compute the array of configurations
|
||||
configs = stdexp.buildConfigs {
|
||||
inherit varConf genConf;
|
||||
};
|
||||
|
||||
in
|
||||
|
||||
stdexp.genExperiment { inherit configs pipeline; }
|
@ -65,9 +65,9 @@
|
||||
inherit genInput;
|
||||
};
|
||||
|
||||
ossSlicesStrongscaling = callPackage ./hpcg/oss.slices.strongscaling.nix {
|
||||
inherit genInput;
|
||||
};
|
||||
ss = callPackage ./hpcg/ss.nix { };
|
||||
|
||||
big.ss = ss.override { enableExtended = true; };
|
||||
};
|
||||
|
||||
heat = rec {
|
||||
|
Loading…
Reference in New Issue
Block a user