diff --git a/garlic/exp/nbody/nblocks.nix b/garlic/exp/nbody/nblocks.nix new file mode 100644 index 0000000..ce4a025 --- /dev/null +++ b/garlic/exp/nbody/nblocks.nix @@ -0,0 +1,110 @@ +{ + stdenv +, stdexp +, bsc +, targetMachine +, stages +, garlicTools + +# Options for the experiment +, enableJemalloc ? false +, enableCTF ? false +# Number of cases tested +, steps ? 7 +# nbody iterations +, timesteps ? 10 +# nbody total number of particles +, particles ? null +, gitBranch ? "garlic/tampi+send+oss+task" +, loops ? 10 +, nblocks0 ? null +}: + +with stdenv.lib; +with garlicTools; + +let + + defaultOpt = var: def: if (var != null) then var else def; + + machineConfig = targetMachine.config; + inherit (machineConfig) hw; + + # Initial variable configuration + varConf = with bsc; { + # Create a list with values 2^n with n from 0 to (steps - 1) inclusive + i = expRange 2 0 (steps - 1); + }; + + # Generate the complete configuration for each unit + genConf = var: fix (self: var // targetMachine.config // { + expName = "nbody-nblocks"; + unitName = "${self.expName}${toString self.nblocks}"; + + inherit (machineConfig) hw; + + # nbody options + particles = defaultOpt particles (4096 * self.hw.cpusPerSocket); + nblocks0 = defaultOpt nblocks0 (self.hw.cpusPerSocket / 2); + # The number of blocks is then computed from the multiplier "i" and + # the initial number of blocks "nblocks0" + nblocks = self.i * self.nblocks0; + + totalTasks = self.ntasksPerNode * self.nodes; + particlesPerTask = self.particles / self.totalTasks; + blocksize = self.particlesPerTask / self.nblocks; + cc = bsc.icc; + mpi = bsc.impi; + cflags = "-g"; + inherit timesteps gitBranch enableJemalloc enableCTF loops; + + # Resources + qos = "debug"; + cpusPerTask = self.hw.cpusPerSocket; + ntasksPerNode = self.hw.socketsPerNode; + nodes = 1; + jobName = self.unitName; + }); + + # Compute the array of configurations + configs = stdexp.buildConfigs { + inherit varConf genConf; + }; + + perf = {nextStage, conf, ...}: with conf; stages.perf { + inherit nextStage; + perfOptions = "record --call-graph dwarf -o \\$\\$.perf"; + }; + + ctf = {nextStage, conf, ...}: with conf; stages.exec { + inherit nextStage; + env = optionalString (conf.enableCTF) '' + export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf,\ + instrument.ctf.conversor.enabled=false" + ''; + }; + + exec = {nextStage, conf, ...}: with conf; stages.exec { + inherit nextStage; + argv = [ "-t" timesteps "-p" particles ]; + }; + + program = {nextStage, conf, ...}: with conf; + let + /* These changes are propagated to all dependencies. For example, + when changing nanos6+jemalloc, we will get tampi built with + nanos6+jemalloc as well. */ + customPkgs = bsc.extend (self: super: { + mpi = conf.mpi; + nanos6 = self.nanos6.override { inherit enableJemalloc; }; + }); + in + customPkgs.apps.nbody.override ({ + inherit cc blocksize mpi gitBranch cflags; + }); + + pipeline = stdexp.stdPipeline ++ [ ctf exec program ]; + +in + + stdexp.genExperiment { inherit configs pipeline; } diff --git a/garlic/exp/nbody/tampi.nix b/garlic/exp/nbody/tampi.nix deleted file mode 100644 index 09516be..0000000 --- a/garlic/exp/nbody/tampi.nix +++ /dev/null @@ -1,101 +0,0 @@ -{ - stdenv -, stdexp -, bsc -, targetMachine -, stages -, garlicTools -, enableJemalloc ? false -, particles ? null -}: - -with stdenv.lib; -with garlicTools; - -let - - machineConfig = targetMachine.config; - inherit (machineConfig) hw; - - # Number of cases tested - steps = 7; - - # First value for nblocks: we want to begin by using 1/2 blocks/cpu so we set - # the first number of blocks to cpusPerSocket / 2 - nblocks0 = hw.cpusPerSocket / 2; - - # Initial variable configuration - varConf = with bsc; { - # Create a list with values 2^n with n from 0 to (steps - 1) inclusive - i = expRange 2 0 (steps - 1); - }; - - # Set here the particles, so we don't have an infinite recursion in the - # genConf attrset. - _particles = if (particles != null) - then particles - else 4096 * hw.cpusPerSocket; - - # Generate the complete configuration for each unit - genConf = with bsc; c: targetMachine.config // rec { - expName = "nbody-nblocks"; - unitName = "${expName}${toString nblocks}"; - - inherit (machineConfig) hw; - # nbody options - particles = _particles; - timesteps = 10; - nblocks = c.i * nblocks0; - totalTasks = ntasksPerNode * nodes; - particlesPerTask = particles / totalTasks; - blocksize = particlesPerTask / nblocks; - cc = icc; - mpi = impi; - gitBranch = "garlic/tampi+send+oss+task"; - cflags = "-g"; - inherit enableJemalloc; - - # Repeat the execution of each unit 10 times - loops = 10; - - # Resources - qos = "debug"; - cpusPerTask = hw.cpusPerSocket; - ntasksPerNode = hw.socketsPerNode; - nodes = 1; - - jobName = unitName; - }; - - # Compute the array of configurations - configs = stdexp.buildConfigs { - inherit varConf genConf; - }; - - perf = {nextStage, conf, ...}: with conf; stages.perf { - inherit nextStage; - perfOptions = "record --call-graph dwarf -o \\$\\$.perf"; - }; - - exec = {nextStage, conf, ...}: with conf; stages.exec { - inherit nextStage; - argv = [ "-t" timesteps "-p" particles ]; - }; - - program = {nextStage, conf, ...}: with conf; - let - customPkgs = stdexp.replaceMpi conf.mpi; - in - customPkgs.apps.nbody.override ({ - inherit cc blocksize mpi gitBranch cflags; - } // optionalAttrs enableJemalloc { - mcxx = bsc.mcxx.override { - nanos6 = bsc.nanos6Jemalloc; - }; - }); - - pipeline = stdexp.stdPipeline ++ [ exec program ]; - -in - - stdexp.genExperiment { inherit configs pipeline; } diff --git a/overlay.nix b/overlay.nix index cbcabf5..77e5ab8 100644 --- a/overlay.nix +++ b/overlay.nix @@ -372,14 +372,30 @@ let # Experiments exp = { nbody = rec { - tampi = callPackage ./garlic/exp/nbody/tampi.nix { }; + baseline = callPackage ./garlic/exp/nbody/nblocks.nix { }; # Experiment variants - baseline = tampi; - small = baseline.override { particles = 12 * 4096; }; + small = baseline.override { + particles = 12 * 4096; + }; # TODO: Update freeCpu using a non-standard pipeline #freeCpu = baseline.override { freeCpu = true; }; jemalloc = baseline.override { enableJemalloc = true; }; + + # Some experiments with traces + trace = { + # Only one unit repeated 30 times + baseline = small.override { + enableCTF = true; + loops = 30; + steps = 1; + }; + + # Same but with jemalloc enabled + jemalloc = trace.baseline.override { + enableJemalloc = true; + }; + }; }; saiph = { @@ -450,6 +466,7 @@ let small = merge [ small ]; jemalloc = merge [ baseline jemalloc ]; #freeCpu = merge [ baseline freeCpu ]; + ctf = merge [ ctf ]; }; hpcg = with exp.hpcg; { @@ -490,6 +507,10 @@ let # script = ./garlic/fig/nbody/freeCpu.R; # dataset = ds.nbody.freeCpu; #}; + ctf = pp.rPlot { + script = ./garlic/fig/nbody/baseline.R; + dataset = ds.nbody.ctf; + }; }; hpcg = {