hpcg: refactor ss and gen using a common file
- The file gen.nix now provides an experiment for each unit, to reduce the evaluation time. - The pipeline is specified in the common.nix file only. - The input dataset path is no longer symlinked, but is specified in the "--load" argument. - The size is renamed to "sizePerTask" instead of "n".
This commit is contained in:
parent
9bb570af7f
commit
b4e37a15a9
72
garlic/exp/hpcg/common.nix
Normal file
72
garlic/exp/hpcg/common.nix
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
{
|
||||||
|
stdenv
|
||||||
|
, stdexp
|
||||||
|
, bsc
|
||||||
|
, stages
|
||||||
|
, callPackage
|
||||||
|
}:
|
||||||
|
|
||||||
|
with stdenv.lib;
|
||||||
|
|
||||||
|
rec {
|
||||||
|
|
||||||
|
checkInput = {nextStage, conf, ...}: stages.exec {
|
||||||
|
inherit nextStage;
|
||||||
|
pre = optionalString (! (conf.enableGen or false)) (
|
||||||
|
let
|
||||||
|
gen = callPackage ./gen.nix { };
|
||||||
|
inputTre = gen.getInputTre conf;
|
||||||
|
exp = inputTre.experiment;
|
||||||
|
unit = elemAt exp.units 0;
|
||||||
|
expName = baseNameOf (toString exp);
|
||||||
|
unitName = baseNameOf (toString unit);
|
||||||
|
inputPath = "$GARLIC_OUT/${expName}/${unitName}/1";
|
||||||
|
in
|
||||||
|
''
|
||||||
|
# Force the generation of the input resultTree as a dependency:
|
||||||
|
# ${toString inputTre.result}
|
||||||
|
|
||||||
|
# Ensure the input dataset is still available
|
||||||
|
export HPCG_INPUT_PATH="${toString inputPath}"
|
||||||
|
|
||||||
|
if [ ! -e "$HPCG_INPUT_PATH" ]; then
|
||||||
|
>&2 echo "Missing input dataset: $HPCG_INPUT_PATH"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
''
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
getSizePerTask = cpusPerTask: sizePerCpu:
|
||||||
|
mapAttrs (name: val: val * cpusPerTask) sizePerCpu;
|
||||||
|
|
||||||
|
exec = {nextStage, conf, ...}: let
|
||||||
|
actionArg = if (conf.enableGen or false)
|
||||||
|
then "--store=."
|
||||||
|
else "--load=\"$HPCG_INPUT_PATH\"";
|
||||||
|
|
||||||
|
in stages.exec {
|
||||||
|
inherit nextStage;
|
||||||
|
argv = [
|
||||||
|
"--nx=${toString conf.sizePerTask.x}"
|
||||||
|
"--ny=${toString conf.sizePerTask.y}"
|
||||||
|
"--nz=${toString conf.sizePerTask.z}"
|
||||||
|
"--npx=${toString conf.nprocs.x}"
|
||||||
|
"--npy=${toString conf.nprocs.y}"
|
||||||
|
"--npz=${toString conf.nprocs.z}"
|
||||||
|
"--nblocks=${toString conf.nblocks}"
|
||||||
|
"--ncomms=${toString conf.ncomms}"
|
||||||
|
# The input symlink is generated by the input stage, which is generated by
|
||||||
|
# the genInput function.
|
||||||
|
actionArg
|
||||||
|
] ++ optional (conf.disableAspectRatio or false) "--no-ar=1";
|
||||||
|
};
|
||||||
|
|
||||||
|
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
|
||||||
|
inherit (conf) gitBranch;
|
||||||
|
};
|
||||||
|
|
||||||
|
pipeline = stdexp.stdPipeline ++ [
|
||||||
|
checkInput
|
||||||
|
exec program ];
|
||||||
|
}
|
@ -5,95 +5,45 @@
|
|||||||
, targetMachine
|
, targetMachine
|
||||||
, stages
|
, stages
|
||||||
, garlicTools
|
, garlicTools
|
||||||
|
, callPackage
|
||||||
}:
|
}:
|
||||||
|
|
||||||
with stdenv.lib;
|
with stdenv.lib;
|
||||||
with builtins;
|
|
||||||
with garlicTools;
|
with garlicTools;
|
||||||
|
|
||||||
let
|
rec {
|
||||||
|
|
||||||
# Generate the complete configuration for each unit
|
# Generate the complete configuration for each unit
|
||||||
genConf = c: targetMachine.config // rec {
|
genConf = c: targetMachine.config // rec {
|
||||||
expName = "${c.expName}.gen";
|
expName = "${c.expName}.gen";
|
||||||
unitName = "${expName}.n${toString n.x}";
|
unitName = "${c.unitName}.gen";
|
||||||
|
|
||||||
inherit (targetMachine.config) hw;
|
inherit (targetMachine.config) hw;
|
||||||
|
|
||||||
# Only the n and gitBranch options are inherited
|
# Inherit options from the current conf
|
||||||
inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch;
|
inherit (c) sizePerTask nprocs disableAspectRatio gitBranch
|
||||||
|
cpusPerTask ntasksPerNode nodes;
|
||||||
|
|
||||||
# Repeat the execution of each unit 30 times
|
# nblocks and ncomms are ignored from c
|
||||||
|
ncomms = 1;
|
||||||
|
nblocks = 1;
|
||||||
|
|
||||||
|
# We only need one run
|
||||||
loops = 1;
|
loops = 1;
|
||||||
|
|
||||||
|
# Generate the input
|
||||||
|
enableGen = true;
|
||||||
|
|
||||||
# Resources
|
# Resources
|
||||||
qos = "debug";
|
qos = "debug";
|
||||||
# ntasksPerNode = hw.socketsPerNode;
|
time = "02:00:00";
|
||||||
# nodes = 2;
|
|
||||||
time = "00:30:00";
|
|
||||||
# task in one socket
|
|
||||||
cpusPerTask = hw.cpusPerSocket;
|
|
||||||
jobName = unitName;
|
jobName = unitName;
|
||||||
};
|
};
|
||||||
|
|
||||||
exec = {nextStage, conf, ...}: with conf; stages.exec {
|
common = callPackage ./common.nix {};
|
||||||
inherit nextStage;
|
|
||||||
argv = [
|
getInputTre = conf: stdexp.genExperiment {
|
||||||
"--nx=${toString conf.n.x}"
|
configs = [ (genConf conf) ];
|
||||||
"--ny=${toString conf.n.y}"
|
pipeline = common.pipeline;
|
||||||
"--nz=${toString conf.n.z}"
|
|
||||||
"--npx=${toString conf.nprocs.x}"
|
|
||||||
"--npy=${toString conf.nprocs.y}"
|
|
||||||
"--npz=${toString conf.nprocs.z}"
|
|
||||||
# nblocks and ncomms are ignored
|
|
||||||
"--nblocks=1"
|
|
||||||
"--ncomms=1"
|
|
||||||
# Store the results in the same directory
|
|
||||||
"--store=."
|
|
||||||
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
|
|
||||||
};
|
};
|
||||||
|
}
|
||||||
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
|
|
||||||
inherit (conf) gitBranch;
|
|
||||||
};
|
|
||||||
|
|
||||||
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
|
||||||
|
|
||||||
genExp = configs: stdexp.genExperiment { inherit configs pipeline; };
|
|
||||||
|
|
||||||
genInputLink = inputConfigs: {nextStage, conf, ...}:
|
|
||||||
let
|
|
||||||
# Compute the experiment that produces HPCG input matrix from the
|
|
||||||
# configuration of this unit:
|
|
||||||
configs = map genConf inputConfigs;
|
|
||||||
inputTre = genExp configs;
|
|
||||||
#inputExp = getExperimentStage inputTrebuchet;
|
|
||||||
#inputExp = trace inputTrebuchet inputTrebuchet.nextStage;
|
|
||||||
inputExp = getExperimentStage inputTre;
|
|
||||||
# Then load the result. This is only used to ensure that we have the
|
|
||||||
# results, so it has been executed.
|
|
||||||
inputRes = inputTre.result;
|
|
||||||
# We also need the unit, to compute the path.
|
|
||||||
inputUnit = stages.unit {
|
|
||||||
conf = genConf conf;
|
|
||||||
stages = pipeline;
|
|
||||||
};
|
|
||||||
# Build the path:
|
|
||||||
expName = baseNameOf (toString inputExp);
|
|
||||||
unitName = baseNameOf (toString inputUnit);
|
|
||||||
relPath = "../../${expName}/${unitName}/1";
|
|
||||||
in stages.exec {
|
|
||||||
inherit nextStage;
|
|
||||||
env = ''
|
|
||||||
# This line ensures that the results of the HPCG generation are complete:
|
|
||||||
# ${inputRes}
|
|
||||||
|
|
||||||
# Then we simply link the input result directory in "input"
|
|
||||||
# We use || true because all ranks will execute this and
|
|
||||||
# the execution will fail
|
|
||||||
ln -sf ${relPath} input || true
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
in
|
|
||||||
#{ inherit genConf genExp genInputLink; }
|
|
||||||
genInputLink
|
|
||||||
|
@ -1,112 +0,0 @@
|
|||||||
{
|
|
||||||
stdenv
|
|
||||||
, stdexp
|
|
||||||
, bsc
|
|
||||||
, targetMachine
|
|
||||||
, stages
|
|
||||||
, genInput
|
|
||||||
}:
|
|
||||||
|
|
||||||
with stdenv.lib;
|
|
||||||
|
|
||||||
let
|
|
||||||
# Initial variable configuration
|
|
||||||
varConf = {
|
|
||||||
n = [
|
|
||||||
{ x = 192 / 4; y = 192 / 4; z = 16 * 192; }
|
|
||||||
];
|
|
||||||
nprocs = [
|
|
||||||
# { x = 2; y = 1; z = 1; }
|
|
||||||
# { x = 4; y = 1; z = 1; }
|
|
||||||
# { x = 8; y = 1; z = 1; }
|
|
||||||
# { x = 16; y = 1; z = 1; }
|
|
||||||
# { x = 32; y = 1; z = 1; }
|
|
||||||
|
|
||||||
# { x = 1; y = 2; z = 1; }
|
|
||||||
# { x = 1; y = 4; z = 1; }
|
|
||||||
# { x = 1; y = 8; z = 1; }
|
|
||||||
# { x = 1; y = 16; z = 1; }
|
|
||||||
# { x = 1; y = 32; z = 1; }
|
|
||||||
|
|
||||||
{ x = 1; y = 1; z = 2; }
|
|
||||||
{ x = 1; y = 1; z = 4; }
|
|
||||||
{ x = 1; y = 1; z = 8; }
|
|
||||||
{ x = 1; y = 1; z = 16; }
|
|
||||||
{ x = 1; y = 1; z = 32; }
|
|
||||||
|
|
||||||
];
|
|
||||||
# nblocks = [ 12 24 48 96 192 384 768 1536 ];
|
|
||||||
nblocks = [ 24 48 96 192 384 ];
|
|
||||||
ncommblocks = [ 1 ];
|
|
||||||
# nodes = [ 1 ];
|
|
||||||
# nodes = [ 1 2 4 8 16 ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Generate the complete configuration for each unit
|
|
||||||
genConf = c: targetMachine.config // rec {
|
|
||||||
expName = "hpcg.oss";
|
|
||||||
unitName = "${expName}.nb${toString nblocks}";
|
|
||||||
|
|
||||||
inherit (targetMachine.config) hw;
|
|
||||||
|
|
||||||
# hpcg options
|
|
||||||
inherit (c) nprocs nblocks ncommblocks;
|
|
||||||
|
|
||||||
n = {
|
|
||||||
x = c.n.x / nprocs.x;
|
|
||||||
y = c.n.y / nprocs.y;
|
|
||||||
z = c.n.z / nprocs.z;
|
|
||||||
};
|
|
||||||
|
|
||||||
gitBranch = "garlic/tampi+isend+oss+task";
|
|
||||||
|
|
||||||
# Repeat the execution of each unit 30 times
|
|
||||||
loops = 10;
|
|
||||||
|
|
||||||
disableAspectRatio = true;
|
|
||||||
|
|
||||||
# Resources
|
|
||||||
qos = "debug";
|
|
||||||
ntasksPerNode = hw.socketsPerNode;
|
|
||||||
time = "02:00:00";
|
|
||||||
# task in one socket
|
|
||||||
cpusPerTask = hw.cpusPerSocket;
|
|
||||||
nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
|
|
||||||
jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Compute the array of configurations
|
|
||||||
configs = stdexp.buildConfigs {
|
|
||||||
inherit varConf genConf;
|
|
||||||
};
|
|
||||||
|
|
||||||
input = genInput configs;
|
|
||||||
|
|
||||||
exec = {nextStage, conf, ...}: stages.exec {
|
|
||||||
inherit nextStage;
|
|
||||||
argv = [
|
|
||||||
"--nx=${toString conf.n.x}"
|
|
||||||
"--ny=${toString conf.n.y}"
|
|
||||||
"--nz=${toString conf.n.z}"
|
|
||||||
# Distribute all processes in X axis
|
|
||||||
"--npx=${toString conf.nprocs.x}"
|
|
||||||
"--npy=${toString conf.nprocs.y}"
|
|
||||||
"--npz=${toString conf.nprocs.z}"
|
|
||||||
"--nblocks=${toString conf.nblocks}"
|
|
||||||
"--ncomms=${toString conf.ncommblocks}"
|
|
||||||
# The input symlink is generated by the input stage, which is generated by
|
|
||||||
# the genInput function.
|
|
||||||
"--load=input"
|
|
||||||
# Disable HPCG Aspect Ratio to run any mpi layout
|
|
||||||
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
|
|
||||||
};
|
|
||||||
|
|
||||||
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
|
|
||||||
inherit (conf) gitBranch;
|
|
||||||
};
|
|
||||||
|
|
||||||
pipeline = stdexp.stdPipeline ++ [ input exec program ];
|
|
||||||
|
|
||||||
in
|
|
||||||
|
|
||||||
stdexp.genExperiment { inherit configs pipeline; }
|
|
68
garlic/exp/hpcg/ss.nix
Normal file
68
garlic/exp/hpcg/ss.nix
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
{
|
||||||
|
stdenv
|
||||||
|
, stdexp
|
||||||
|
, bsc
|
||||||
|
, targetMachine
|
||||||
|
, stages
|
||||||
|
, garlicTools
|
||||||
|
, callPackage
|
||||||
|
, enableExtended ? false
|
||||||
|
}:
|
||||||
|
|
||||||
|
with stdenv.lib;
|
||||||
|
with garlicTools;
|
||||||
|
|
||||||
|
let
|
||||||
|
common = callPackage ./common.nix { };
|
||||||
|
|
||||||
|
inherit (common) pipeline getSizePerTask;
|
||||||
|
|
||||||
|
# Initial variable configuration
|
||||||
|
varConf = {
|
||||||
|
nodes = range2 1 16;
|
||||||
|
blocksPerCpu = if (enableExtended)
|
||||||
|
then range2 1 8
|
||||||
|
else [ 4 ];
|
||||||
|
gitBranch = [
|
||||||
|
"garlic/tampi+isend+oss+task"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
# Generate the complete configuration for each unit
|
||||||
|
genConf = c: targetMachine.config // rec {
|
||||||
|
expName = "hpcg-ss";
|
||||||
|
unitName = "${expName}"
|
||||||
|
+ "-nodes${toString nodes}"
|
||||||
|
+ "-bpc${toString blocksPerCpu}";
|
||||||
|
|
||||||
|
inherit (targetMachine.config) hw;
|
||||||
|
|
||||||
|
# hpcg options
|
||||||
|
inherit (c) nodes blocksPerCpu gitBranch;
|
||||||
|
totalTasks = ntasksPerNode * nodes;
|
||||||
|
sizePerCpu = { x=2; y=2; z=128 / totalTasks; };
|
||||||
|
sizePerTask = getSizePerTask cpusPerTask sizePerCpu;
|
||||||
|
nprocs = { x=1; y=1; z=totalTasks; };
|
||||||
|
nblocks = blocksPerCpu * cpusPerTask;
|
||||||
|
ncomms = 1;
|
||||||
|
disableAspectRatio = true;
|
||||||
|
|
||||||
|
# Repeat the execution of each unit several times
|
||||||
|
loops = 10;
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
qos = "debug";
|
||||||
|
time = "02:00:00";
|
||||||
|
cpusPerTask = hw.cpusPerSocket;
|
||||||
|
ntasksPerNode = hw.socketsPerNode;
|
||||||
|
jobName = unitName;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Compute the array of configurations
|
||||||
|
configs = stdexp.buildConfigs {
|
||||||
|
inherit varConf genConf;
|
||||||
|
};
|
||||||
|
|
||||||
|
in
|
||||||
|
|
||||||
|
stdexp.genExperiment { inherit configs pipeline; }
|
@ -65,9 +65,9 @@
|
|||||||
inherit genInput;
|
inherit genInput;
|
||||||
};
|
};
|
||||||
|
|
||||||
ossSlicesStrongscaling = callPackage ./hpcg/oss.slices.strongscaling.nix {
|
ss = callPackage ./hpcg/ss.nix { };
|
||||||
inherit genInput;
|
|
||||||
};
|
big.ss = ss.override { enableExtended = true; };
|
||||||
};
|
};
|
||||||
|
|
||||||
heat = rec {
|
heat = rec {
|
||||||
|
Loading…
Reference in New Issue
Block a user