nbody: add ctf tests

This commit is contained in:
Rodrigo Arias 2020-12-03 13:20:40 +01:00
parent b8a1ea3f72
commit bdaadd4ef7
3 changed files with 134 additions and 104 deletions

View File

@ -0,0 +1,110 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
, garlicTools
# Options for the experiment
, enableJemalloc ? false
, enableCTF ? false
# Number of cases tested
, steps ? 7
# nbody iterations
, timesteps ? 10
# nbody total number of particles
, particles ? null
, gitBranch ? "garlic/tampi+send+oss+task"
, loops ? 10
, nblocks0 ? null
}:
with stdenv.lib;
with garlicTools;
let
defaultOpt = var: def: if (var != null) then var else def;
machineConfig = targetMachine.config;
inherit (machineConfig) hw;
# Initial variable configuration
varConf = with bsc; {
# Create a list with values 2^n with n from 0 to (steps - 1) inclusive
i = expRange 2 0 (steps - 1);
};
# Generate the complete configuration for each unit
genConf = var: fix (self: var // targetMachine.config // {
expName = "nbody-nblocks";
unitName = "${self.expName}${toString self.nblocks}";
inherit (machineConfig) hw;
# nbody options
particles = defaultOpt particles (4096 * self.hw.cpusPerSocket);
nblocks0 = defaultOpt nblocks0 (self.hw.cpusPerSocket / 2);
# The number of blocks is then computed from the multiplier "i" and
# the initial number of blocks "nblocks0"
nblocks = self.i * self.nblocks0;
totalTasks = self.ntasksPerNode * self.nodes;
particlesPerTask = self.particles / self.totalTasks;
blocksize = self.particlesPerTask / self.nblocks;
cc = bsc.icc;
mpi = bsc.impi;
cflags = "-g";
inherit timesteps gitBranch enableJemalloc enableCTF loops;
# Resources
qos = "debug";
cpusPerTask = self.hw.cpusPerSocket;
ntasksPerNode = self.hw.socketsPerNode;
nodes = 1;
jobName = self.unitName;
});
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
perf = {nextStage, conf, ...}: with conf; stages.perf {
inherit nextStage;
perfOptions = "record --call-graph dwarf -o \\$\\$.perf";
};
ctf = {nextStage, conf, ...}: with conf; stages.exec {
inherit nextStage;
env = optionalString (conf.enableCTF) ''
export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf,\
instrument.ctf.conversor.enabled=false"
'';
};
exec = {nextStage, conf, ...}: with conf; stages.exec {
inherit nextStage;
argv = [ "-t" timesteps "-p" particles ];
};
program = {nextStage, conf, ...}: with conf;
let
/* These changes are propagated to all dependencies. For example,
when changing nanos6+jemalloc, we will get tampi built with
nanos6+jemalloc as well. */
customPkgs = bsc.extend (self: super: {
mpi = conf.mpi;
nanos6 = self.nanos6.override { inherit enableJemalloc; };
});
in
customPkgs.apps.nbody.override ({
inherit cc blocksize mpi gitBranch cflags;
});
pipeline = stdexp.stdPipeline ++ [ ctf exec program ];
in
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -1,101 +0,0 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
, garlicTools
, enableJemalloc ? false
, particles ? null
}:
with stdenv.lib;
with garlicTools;
let
machineConfig = targetMachine.config;
inherit (machineConfig) hw;
# Number of cases tested
steps = 7;
# First value for nblocks: we want to begin by using 1/2 blocks/cpu so we set
# the first number of blocks to cpusPerSocket / 2
nblocks0 = hw.cpusPerSocket / 2;
# Initial variable configuration
varConf = with bsc; {
# Create a list with values 2^n with n from 0 to (steps - 1) inclusive
i = expRange 2 0 (steps - 1);
};
# Set here the particles, so we don't have an infinite recursion in the
# genConf attrset.
_particles = if (particles != null)
then particles
else 4096 * hw.cpusPerSocket;
# Generate the complete configuration for each unit
genConf = with bsc; c: targetMachine.config // rec {
expName = "nbody-nblocks";
unitName = "${expName}${toString nblocks}";
inherit (machineConfig) hw;
# nbody options
particles = _particles;
timesteps = 10;
nblocks = c.i * nblocks0;
totalTasks = ntasksPerNode * nodes;
particlesPerTask = particles / totalTasks;
blocksize = particlesPerTask / nblocks;
cc = icc;
mpi = impi;
gitBranch = "garlic/tampi+send+oss+task";
cflags = "-g";
inherit enableJemalloc;
# Repeat the execution of each unit 10 times
loops = 10;
# Resources
qos = "debug";
cpusPerTask = hw.cpusPerSocket;
ntasksPerNode = hw.socketsPerNode;
nodes = 1;
jobName = unitName;
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
perf = {nextStage, conf, ...}: with conf; stages.perf {
inherit nextStage;
perfOptions = "record --call-graph dwarf -o \\$\\$.perf";
};
exec = {nextStage, conf, ...}: with conf; stages.exec {
inherit nextStage;
argv = [ "-t" timesteps "-p" particles ];
};
program = {nextStage, conf, ...}: with conf;
let
customPkgs = stdexp.replaceMpi conf.mpi;
in
customPkgs.apps.nbody.override ({
inherit cc blocksize mpi gitBranch cflags;
} // optionalAttrs enableJemalloc {
mcxx = bsc.mcxx.override {
nanos6 = bsc.nanos6Jemalloc;
};
});
pipeline = stdexp.stdPipeline ++ [ exec program ];
in
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -372,14 +372,30 @@ let
# Experiments # Experiments
exp = { exp = {
nbody = rec { nbody = rec {
tampi = callPackage ./garlic/exp/nbody/tampi.nix { }; baseline = callPackage ./garlic/exp/nbody/nblocks.nix { };
# Experiment variants # Experiment variants
baseline = tampi; small = baseline.override {
small = baseline.override { particles = 12 * 4096; }; particles = 12 * 4096;
};
# TODO: Update freeCpu using a non-standard pipeline # TODO: Update freeCpu using a non-standard pipeline
#freeCpu = baseline.override { freeCpu = true; }; #freeCpu = baseline.override { freeCpu = true; };
jemalloc = baseline.override { enableJemalloc = true; }; jemalloc = baseline.override { enableJemalloc = true; };
# Some experiments with traces
trace = {
# Only one unit repeated 30 times
baseline = small.override {
enableCTF = true;
loops = 30;
steps = 1;
};
# Same but with jemalloc enabled
jemalloc = trace.baseline.override {
enableJemalloc = true;
};
};
}; };
saiph = { saiph = {
@ -450,6 +466,7 @@ let
small = merge [ small ]; small = merge [ small ];
jemalloc = merge [ baseline jemalloc ]; jemalloc = merge [ baseline jemalloc ];
#freeCpu = merge [ baseline freeCpu ]; #freeCpu = merge [ baseline freeCpu ];
ctf = merge [ ctf ];
}; };
hpcg = with exp.hpcg; { hpcg = with exp.hpcg; {
@ -490,6 +507,10 @@ let
# script = ./garlic/fig/nbody/freeCpu.R; # script = ./garlic/fig/nbody/freeCpu.R;
# dataset = ds.nbody.freeCpu; # dataset = ds.nbody.freeCpu;
#}; #};
ctf = pp.rPlot {
script = ./garlic/fig/nbody/baseline.R;
dataset = ds.nbody.ctf;
};
}; };
hpcg = { hpcg = {