From 9020f8776525100e9689bb13b594e8e5f6f1d8a9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 9 Oct 2020 17:20:50 +0200 Subject: [PATCH] Simplify saiph numcomm experiment --- garlic/exp/saiph/numcomm.nix | 197 +++++++---------------------------- 1 file changed, 35 insertions(+), 162 deletions(-) diff --git a/garlic/exp/saiph/numcomm.nix b/garlic/exp/saiph/numcomm.nix index 7da9d9c..f75b173 100644 --- a/garlic/exp/saiph/numcomm.nix +++ b/garlic/exp/saiph/numcomm.nix @@ -1,188 +1,61 @@ { stdenv -, nixpkgs -, pkgs -, genApp -, genConfigs -, runWrappers +, stdexp +, bsc +, targetMachine +, stages }: with stdenv.lib; let - # Set variable configuration for the experiment - varConfig = { - devMode = [ true ]; - numComm = [ 1 ]; + # Configurations for each unit (using the cartesian product) + confUnit = with bsc; { + numComm = [ 1 2 ]; }; - # Common configuration - common = { - # Compile time nbody config + # Configuration for the complete experiment + confExperiment = with bsc; { + # saiph options + devMode = false; + mpi = impi; gitBranch = "garlic/tampi+isend+oss+task+simd"; - mpi = pkgs.bsc.impi; + + # Repeat the execution of each unit 30 times + loops = 100; # Resources - ntasksPerNode = "2"; - nodes = "1"; - - # Stage configuration - enableSbatch = true; - enableControl = true; - enableExtrae = false; - enablePerf = false; - - # MN4 path - nixPrefix = "/gpfs/projects/bsc15/nix"; + ntasksPerNode = 2; + nodes = 1; + cpuBind = "sockets,verbose"; }; - # Compute the cartesian product of all configurations - configs = map (conf: conf // common) (genConfigs varConfig); - - stageProgram = stage: - if stage ? programPath - then "${stage}${stage.programPath}" else "${stage}"; - - w = runWrappers; - - sbatch = {stage, conf, ...}: with conf; w.sbatch ( - # Allow a user to define a custom reservation for the job in MareNostrum4, - # by setting the garlic.sbatch.reservation attribute in the - # ~/.config/nixpkgs/config.nix file. If the attribute is not set, no - # reservation is used. The user reservation may be overwritten by the - # experiment, if the reservation is set like with nodes or ntasksPerNode. - optionalAttrs (pkgs.config ? garlic.sbatch.reservation) { - inherit (pkgs.config.garlic.sbatch) reservation; - } // { - program = stageProgram stage; - exclusive = true; - time = "02:00:00"; - qos = "debug"; - jobName = "saiph"; - inherit nixPrefix nodes ntasksPerNode; - } - ); - - control = {stage, conf, ...}: with conf; w.control { - program = stageProgram stage; - loops = 100; + # Compute the array of configurations + configs = stdexp.buildConfigs { + var = confUnit; + fixed = targetMachine.config // confExperiment; }; - srun = {stage, conf, ...}: with conf; w.srun { - program = stageProgram stage; - srunOptions = "--cpu-bind=verbose,sockets"; - inherit nixPrefix; - }; - - statspy = {stage, conf, ...}: with conf; w.statspy { - program = stageProgram stage; - }; - - perf = {stage, conf, ...}: with conf; w.perf { - program = stageProgram stage; - perfArgs = "sched record -a"; - }; - - nixsetup = {stage, conf, ...}: with conf; w.nixsetup { - program = stageProgram stage; - nixsetup = "${nixPrefix}/bin/nix-setup"; - }; - - extrae = {stage, conf, ...}: - let - # We set the mpi implementation to the one specified in the conf, so all - # packages in bsc will use that one. - customPkgs = genPkgs (self: super: { - bsc = super.bsc // { mpi = conf.mpi; }; - }); - - extrae = customPkgs.bsc.extrae; - in - w.extrae { - program = stageProgram stage; - extrae = extrae; - traceLib = "nanosmpi"; # mpi -> libtracempi.so - configFile = ./extrae.xml; - }; - - bscOverlay = import ../../../overlay.nix; - - genPkgs = newOverlay: nixpkgs { - overlays = [ - bscOverlay - newOverlay - ]; - }; - - # Print the environment to ensure we don't get anything nasty - envRecord = {stage, conf, ...}: w.envRecord { - program = stageProgram stage; - }; - - broom = {stage, conf, ...}: w.broom { - program = stageProgram stage; - }; - # We may be able to use overlays by invoking the fix function directly, but we - # have to get the definition of the bsc packages and the garlic ones as - # overlays. - - argv = {stage, conf, ...}: with conf; w.argv { - program = stageProgram stage; + exec = {nextStage, conf, ...}: with conf; stages.exec { + inherit nextStage; env = '' export OMP_NUM_THREADS=24 export NANOS6_REPORT_PREFIX="#" export I_MPI_THREAD_SPLIT=1 - export ASAN_SYMBOLIZER_PATH=${pkgs.bsc.clangOmpss2Unwrapped}/bin/llvm-symbolizer - '' - + optionalString enableExtrae - ''export NANOS6=extrae - export NANOS6_EXTRAE_AS_THREADS=0 + export ASAN_SYMBOLIZER_PATH=${bsc.clangOmpss2Unwrapped}/bin/llvm-symbolizer ''; }; - saiphFn = {stage, conf, ...}: with conf; - let - # We set the mpi implementation to the one specified in the conf, so all - # packages in bsc will use that one. - customPkgs = genPkgs (self: super: { - bsc = super.bsc // { mpi = conf.mpi; }; - }); - in - customPkgs.bsc.garlic.saiph.override { - inherit devMode numComm mpi gitBranch; - }; + program = {nextStage, conf, ...}: with conf; + let + customPkgs = stdexp.replaceMpi conf.mpi; + in + customPkgs.apps.saiph.override { + inherit devMode numComm mpi gitBranch; + }; - stages = with common; [] - # Cleans ALL environment variables - ++ [ broom ] - - # Use sbatch to request resources first - ++ optionals enableSbatch [ sbatch nixsetup ] - - # Record the current env vars set by SLURM to verify we don't have something - # nasty (like sourcing .bashrc). Take a look at #26 - ++ [ envRecord ] - - # Repeats the next stages N=30 times - ++ optional enableControl control - - # Executes srun to launch the program in the requested nodes, and - # immediately after enters the nix environment again, as slurmstepd launches - # the next stages from outside the namespace. - ++ [ srun nixsetup ] - - # Intrumentation with extrae - ++ optional enableExtrae extrae - - # Optionally profile the next stages with perf - ++ optional enablePerf perf - - # Execute the saiph example app - ++ [ argv saiphFn ]; - - # List of actual programs to be executed - jobs = map (conf: w.stagen { inherit conf stages; }) configs; + pipeline = stdexp.stdStages ++ [ exec program ]; in - # We simply run each program one after another - w.launch jobs + + stdexp.genExperiment { inherit configs pipeline; }