From b4e37a15a9ae8f60955313ffead981c3f3fcd812 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Apr 2021 11:51:34 +0200 Subject: [PATCH] hpcg: refactor ss and gen using a common file - The file gen.nix now provides an experiment for each unit, to reduce the evaluation time. - The pipeline is specified in the common.nix file only. - The input dataset path is no longer symlinked, but is specified in the "--load" argument. - The size is renamed to "sizePerTask" instead of "n". --- garlic/exp/hpcg/common.nix | 72 ++++++++++++ garlic/exp/hpcg/gen.nix | 94 ++++------------ garlic/exp/hpcg/oss.slices.strongscaling.nix | 112 ------------------- garlic/exp/hpcg/ss.nix | 68 +++++++++++ garlic/exp/index.nix | 6 +- 5 files changed, 165 insertions(+), 187 deletions(-) create mode 100644 garlic/exp/hpcg/common.nix delete mode 100644 garlic/exp/hpcg/oss.slices.strongscaling.nix create mode 100644 garlic/exp/hpcg/ss.nix diff --git a/garlic/exp/hpcg/common.nix b/garlic/exp/hpcg/common.nix new file mode 100644 index 0000000..6bc5275 --- /dev/null +++ b/garlic/exp/hpcg/common.nix @@ -0,0 +1,72 @@ +{ + stdenv +, stdexp +, bsc +, stages +, callPackage +}: + +with stdenv.lib; + +rec { + + checkInput = {nextStage, conf, ...}: stages.exec { + inherit nextStage; + pre = optionalString (! (conf.enableGen or false)) ( + let + gen = callPackage ./gen.nix { }; + inputTre = gen.getInputTre conf; + exp = inputTre.experiment; + unit = elemAt exp.units 0; + expName = baseNameOf (toString exp); + unitName = baseNameOf (toString unit); + inputPath = "$GARLIC_OUT/${expName}/${unitName}/1"; + in + '' + # Force the generation of the input resultTree as a dependency: + # ${toString inputTre.result} + + # Ensure the input dataset is still available + export HPCG_INPUT_PATH="${toString inputPath}" + + if [ ! -e "$HPCG_INPUT_PATH" ]; then + >&2 echo "Missing input dataset: $HPCG_INPUT_PATH" + exit 1 + fi + '' + ); + }; + + getSizePerTask = cpusPerTask: sizePerCpu: + mapAttrs (name: val: val * cpusPerTask) sizePerCpu; + + exec = {nextStage, conf, ...}: let + actionArg = if (conf.enableGen or false) + then "--store=." + else "--load=\"$HPCG_INPUT_PATH\""; + + in stages.exec { + inherit nextStage; + argv = [ + "--nx=${toString conf.sizePerTask.x}" + "--ny=${toString conf.sizePerTask.y}" + "--nz=${toString conf.sizePerTask.z}" + "--npx=${toString conf.nprocs.x}" + "--npy=${toString conf.nprocs.y}" + "--npz=${toString conf.nprocs.z}" + "--nblocks=${toString conf.nblocks}" + "--ncomms=${toString conf.ncomms}" + # The input symlink is generated by the input stage, which is generated by + # the genInput function. + actionArg + ] ++ optional (conf.disableAspectRatio or false) "--no-ar=1"; + }; + + program = {nextStage, conf, ...}: bsc.apps.hpcg.override { + inherit (conf) gitBranch; + }; + + pipeline = stdexp.stdPipeline ++ [ + checkInput + exec program ]; +} diff --git a/garlic/exp/hpcg/gen.nix b/garlic/exp/hpcg/gen.nix index a9e7316..f91962f 100644 --- a/garlic/exp/hpcg/gen.nix +++ b/garlic/exp/hpcg/gen.nix @@ -5,95 +5,45 @@ , targetMachine , stages , garlicTools +, callPackage }: with stdenv.lib; -with builtins; with garlicTools; -let +rec { + # Generate the complete configuration for each unit genConf = c: targetMachine.config // rec { expName = "${c.expName}.gen"; - unitName = "${expName}.n${toString n.x}"; + unitName = "${c.unitName}.gen"; inherit (targetMachine.config) hw; - # Only the n and gitBranch options are inherited - inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch; + # Inherit options from the current conf + inherit (c) sizePerTask nprocs disableAspectRatio gitBranch + cpusPerTask ntasksPerNode nodes; - # Repeat the execution of each unit 30 times + # nblocks and ncomms are ignored from c + ncomms = 1; + nblocks = 1; + + # We only need one run loops = 1; + # Generate the input + enableGen = true; + # Resources qos = "debug"; - # ntasksPerNode = hw.socketsPerNode; - # nodes = 2; - time = "00:30:00"; - # task in one socket - cpusPerTask = hw.cpusPerSocket; + time = "02:00:00"; jobName = unitName; }; - exec = {nextStage, conf, ...}: with conf; stages.exec { - inherit nextStage; - argv = [ - "--nx=${toString conf.n.x}" - "--ny=${toString conf.n.y}" - "--nz=${toString conf.n.z}" - "--npx=${toString conf.nprocs.x}" - "--npy=${toString conf.nprocs.y}" - "--npz=${toString conf.nprocs.z}" - # nblocks and ncomms are ignored - "--nblocks=1" - "--ncomms=1" - # Store the results in the same directory - "--store=." - ] ++ optional (conf.disableAspectRatio) "--no-ar=1"; + common = callPackage ./common.nix {}; + + getInputTre = conf: stdexp.genExperiment { + configs = [ (genConf conf) ]; + pipeline = common.pipeline; }; - - program = {nextStage, conf, ...}: bsc.apps.hpcg.override { - inherit (conf) gitBranch; - }; - - pipeline = stdexp.stdPipeline ++ [ exec program ]; - - genExp = configs: stdexp.genExperiment { inherit configs pipeline; }; - - genInputLink = inputConfigs: {nextStage, conf, ...}: - let - # Compute the experiment that produces HPCG input matrix from the - # configuration of this unit: - configs = map genConf inputConfigs; - inputTre = genExp configs; - #inputExp = getExperimentStage inputTrebuchet; - #inputExp = trace inputTrebuchet inputTrebuchet.nextStage; - inputExp = getExperimentStage inputTre; - # Then load the result. This is only used to ensure that we have the - # results, so it has been executed. - inputRes = inputTre.result; - # We also need the unit, to compute the path. - inputUnit = stages.unit { - conf = genConf conf; - stages = pipeline; - }; - # Build the path: - expName = baseNameOf (toString inputExp); - unitName = baseNameOf (toString inputUnit); - relPath = "../../${expName}/${unitName}/1"; - in stages.exec { - inherit nextStage; - env = '' - # This line ensures that the results of the HPCG generation are complete: - # ${inputRes} - - # Then we simply link the input result directory in "input" - # We use || true because all ranks will execute this and - # the execution will fail - ln -sf ${relPath} input || true - ''; - }; - -in - #{ inherit genConf genExp genInputLink; } - genInputLink +} diff --git a/garlic/exp/hpcg/oss.slices.strongscaling.nix b/garlic/exp/hpcg/oss.slices.strongscaling.nix deleted file mode 100644 index c7cb46d..0000000 --- a/garlic/exp/hpcg/oss.slices.strongscaling.nix +++ /dev/null @@ -1,112 +0,0 @@ -{ - stdenv -, stdexp -, bsc -, targetMachine -, stages -, genInput -}: - -with stdenv.lib; - -let - # Initial variable configuration - varConf = { - n = [ - { x = 192 / 4; y = 192 / 4; z = 16 * 192; } - ]; - nprocs = [ - # { x = 2; y = 1; z = 1; } - # { x = 4; y = 1; z = 1; } - # { x = 8; y = 1; z = 1; } - # { x = 16; y = 1; z = 1; } - # { x = 32; y = 1; z = 1; } - - # { x = 1; y = 2; z = 1; } - # { x = 1; y = 4; z = 1; } - # { x = 1; y = 8; z = 1; } - # { x = 1; y = 16; z = 1; } - # { x = 1; y = 32; z = 1; } - - { x = 1; y = 1; z = 2; } - { x = 1; y = 1; z = 4; } - { x = 1; y = 1; z = 8; } - { x = 1; y = 1; z = 16; } - { x = 1; y = 1; z = 32; } - - ]; - # nblocks = [ 12 24 48 96 192 384 768 1536 ]; - nblocks = [ 24 48 96 192 384 ]; - ncommblocks = [ 1 ]; - # nodes = [ 1 ]; - # nodes = [ 1 2 4 8 16 ]; - }; - - # Generate the complete configuration for each unit - genConf = c: targetMachine.config // rec { - expName = "hpcg.oss"; - unitName = "${expName}.nb${toString nblocks}"; - - inherit (targetMachine.config) hw; - - # hpcg options - inherit (c) nprocs nblocks ncommblocks; - - n = { - x = c.n.x / nprocs.x; - y = c.n.y / nprocs.y; - z = c.n.z / nprocs.z; - }; - - gitBranch = "garlic/tampi+isend+oss+task"; - - # Repeat the execution of each unit 30 times - loops = 10; - - disableAspectRatio = true; - - # Resources - qos = "debug"; - ntasksPerNode = hw.socketsPerNode; - time = "02:00:00"; - # task in one socket - cpusPerTask = hw.cpusPerSocket; - nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode; - jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}"; - }; - - # Compute the array of configurations - configs = stdexp.buildConfigs { - inherit varConf genConf; - }; - - input = genInput configs; - - exec = {nextStage, conf, ...}: stages.exec { - inherit nextStage; - argv = [ - "--nx=${toString conf.n.x}" - "--ny=${toString conf.n.y}" - "--nz=${toString conf.n.z}" - # Distribute all processes in X axis - "--npx=${toString conf.nprocs.x}" - "--npy=${toString conf.nprocs.y}" - "--npz=${toString conf.nprocs.z}" - "--nblocks=${toString conf.nblocks}" - "--ncomms=${toString conf.ncommblocks}" - # The input symlink is generated by the input stage, which is generated by - # the genInput function. - "--load=input" - # Disable HPCG Aspect Ratio to run any mpi layout - ] ++ optional (conf.disableAspectRatio) "--no-ar=1"; - }; - - program = {nextStage, conf, ...}: bsc.apps.hpcg.override { - inherit (conf) gitBranch; - }; - - pipeline = stdexp.stdPipeline ++ [ input exec program ]; - -in - - stdexp.genExperiment { inherit configs pipeline; } diff --git a/garlic/exp/hpcg/ss.nix b/garlic/exp/hpcg/ss.nix new file mode 100644 index 0000000..164681f --- /dev/null +++ b/garlic/exp/hpcg/ss.nix @@ -0,0 +1,68 @@ +{ + stdenv +, stdexp +, bsc +, targetMachine +, stages +, garlicTools +, callPackage +, enableExtended ? false +}: + +with stdenv.lib; +with garlicTools; + +let + common = callPackage ./common.nix { }; + + inherit (common) pipeline getSizePerTask; + + # Initial variable configuration + varConf = { + nodes = range2 1 16; + blocksPerCpu = if (enableExtended) + then range2 1 8 + else [ 4 ]; + gitBranch = [ + "garlic/tampi+isend+oss+task" + ]; + }; + + # Generate the complete configuration for each unit + genConf = c: targetMachine.config // rec { + expName = "hpcg-ss"; + unitName = "${expName}" + + "-nodes${toString nodes}" + + "-bpc${toString blocksPerCpu}"; + + inherit (targetMachine.config) hw; + + # hpcg options + inherit (c) nodes blocksPerCpu gitBranch; + totalTasks = ntasksPerNode * nodes; + sizePerCpu = { x=2; y=2; z=128 / totalTasks; }; + sizePerTask = getSizePerTask cpusPerTask sizePerCpu; + nprocs = { x=1; y=1; z=totalTasks; }; + nblocks = blocksPerCpu * cpusPerTask; + ncomms = 1; + disableAspectRatio = true; + + # Repeat the execution of each unit several times + loops = 10; + + # Resources + qos = "debug"; + time = "02:00:00"; + cpusPerTask = hw.cpusPerSocket; + ntasksPerNode = hw.socketsPerNode; + jobName = unitName; + }; + + # Compute the array of configurations + configs = stdexp.buildConfigs { + inherit varConf genConf; + }; + +in + + stdexp.genExperiment { inherit configs pipeline; } diff --git a/garlic/exp/index.nix b/garlic/exp/index.nix index a33ca36..dc46b76 100644 --- a/garlic/exp/index.nix +++ b/garlic/exp/index.nix @@ -65,9 +65,9 @@ inherit genInput; }; - ossSlicesStrongscaling = callPackage ./hpcg/oss.slices.strongscaling.nix { - inherit genInput; - }; + ss = callPackage ./hpcg/ss.nix { }; + + big.ss = ss.override { enableExtended = true; }; }; heat = rec {