From cc101ad1d394beb5e6334f82f63a178b55ee4027 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 21 Sep 2020 17:30:24 +0200 Subject: [PATCH] Add saiph experiments --- bsc/dummy/default.nix | 3 +- bsc/llvm-ompss2/clang.nix | 1 - bsc/llvm-ompss2/default.nix | 4 +- bsc/mcxx/default.nix | 13 +- bsc/mcxx/rarias.nix | 1 + garlic/exp/saiph/extrae.xml | 211 +++++++++++++++++++++++++ garlic/exp/saiph/mpi.nix | 88 +++++++++++ garlic/exp/saiph/numcomm.nix | 136 ++++++++++++++++ garlic/exp/saiph/tampi.nix | 165 +++++++++++++++++++ {bsc/apps => garlic}/saiph/default.nix | 26 ++- garlic/stages/sbatch.nix | 2 + overlay.nix | 30 +++- 12 files changed, 655 insertions(+), 25 deletions(-) create mode 100644 garlic/exp/saiph/extrae.xml create mode 100644 garlic/exp/saiph/mpi.nix create mode 100644 garlic/exp/saiph/numcomm.nix create mode 100644 garlic/exp/saiph/tampi.nix rename {bsc/apps => garlic}/saiph/default.nix (55%) diff --git a/bsc/dummy/default.nix b/bsc/dummy/default.nix index b063516..be4b21c 100644 --- a/bsc/dummy/default.nix +++ b/bsc/dummy/default.nix @@ -14,7 +14,8 @@ stdenv.mkDerivation rec { cat > $out/bin/dummy <> $out/nix-support/cc-cflags echo "-B${gcc.cc}/lib/gcc/${targetConfig}/${gcc.version}" >> $out/nix-support/cc-cflags diff --git a/bsc/mcxx/default.nix b/bsc/mcxx/default.nix index 50ba72c..4b05ca3 100644 --- a/bsc/mcxx/default.nix +++ b/bsc/mcxx/default.nix @@ -1,5 +1,4 @@ { stdenv -, fetchgit , autoreconfHook , nanos6 , gperf @@ -13,9 +12,8 @@ }: stdenv.mkDerivation rec { - name = "mcxx"; - #version attribute ignored when using fetchgit: - #version = "2.2.0-70a299cf"; + pname = "mcxx"; + version = "${src.shortRev}"; passthru = { CC = "mcc"; @@ -23,10 +21,9 @@ stdenv.mkDerivation rec { }; # Use patched Extrae version - src = fetchgit { + src = builtins.fetchGit { url = "https://github.com/bsc-pm/mcxx"; - rev = "70a299cfeb1f96735e6b9835aee946451f1913b2"; - sha256 = "1n8y0h47jm2ll67xbz930372xkl9647z12lfwz2472j3y86yxpmw"; + ref = "master"; }; enableParallelBuilding = true; @@ -55,5 +52,7 @@ stdenv.mkDerivation rec { configureFlags = [ "--enable-ompss-2" "--with-nanos6=${nanos6}" +# Fails with "memory exhausted" with bison 3.7.1 +# "--enable-bison-regeneration" ]; } diff --git a/bsc/mcxx/rarias.nix b/bsc/mcxx/rarias.nix index 47a5f6c..d44db2a 100644 --- a/bsc/mcxx/rarias.nix +++ b/bsc/mcxx/rarias.nix @@ -49,6 +49,7 @@ stdenv.mkDerivation rec { configureFlags = [ "--enable-ompss-2" "--with-nanos6=${nanos6}" +# "--enable-bison-regeneration" ]; # Regenerate ia32 builtins to add the ones for gcc9 diff --git a/garlic/exp/saiph/extrae.xml b/garlic/exp/saiph/extrae.xml new file mode 100644 index 0000000..b9af29b --- /dev/null +++ b/garlic/exp/saiph/extrae.xml @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + + + + 1-3 + + 1-5 + + 1-3 + + 1-3 + + 1-3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PAPI_TOT_INS,PAPI_TOT_CYC + + + + + + + + + + + + + + + + + TRACE + + 5 + + /scratch + + /gpfs/scratch/bsc41/bsc41273 + + + + + + 5000000 + + + + + + + + /gpfs/scratch/bsc41/bsc41273/control + + + + + + + 10M + + + + + + + + + + + 500u + + + + + + + + + + + + + + + + + + + + diff --git a/garlic/exp/saiph/mpi.nix b/garlic/exp/saiph/mpi.nix new file mode 100644 index 0000000..a618f85 --- /dev/null +++ b/garlic/exp/saiph/mpi.nix @@ -0,0 +1,88 @@ +{ + bsc +, nbody +, genApp +, genConfigs + +# Wrappers +, launchWrapper +, sbatchWrapper +, srunWrapper +, argvWrapper +, controlWrapper +, nixsetupWrapper +}: + +let + # Set the configuration for the experiment + config = { + cc = [ bsc.icc ]; + blocksize = [ 2048 ]; + mpi = [ bsc.impi bsc.openmpi bsc.mpich ]; + }; + + extraConfig = { + particles = 32*1024; + timesteps = 10; + ntasksPerNode = 2; + nodes = 1; + time = "00:10:00"; + qos = "debug"; + #mpi = bsc.impi; + #mpi = bsc.openmpi; + gitBranch = "garlic/mpi+send"; + gitURL = "ssh://git@bscpm02.bsc.es/garlic/apps/nbody.git"; + }; + + # Compute the cartesian product of all configurations + configs = map (conf: conf // extraConfig) (genConfigs config); + + sbatch = conf: app: sbatchWrapper { + app = app; + nixPrefix = "/gpfs/projects/bsc15/nix"; + exclusive = false; + ntasksPerNode = "${toString conf.ntasksPerNode}"; + nodes = "${toString conf.nodes}"; + time = conf.time; + qos = conf.qos; + chdirPrefix = "/home/bsc15/bsc15557/bsc-nixpkgs/out"; + }; + + srun = app: srunWrapper { + app = app; + nixPrefix = "/gpfs/projects/bsc15/nix"; + }; + + argv = conf: app: + with conf; + argvWrapper { + app = app; + argv = ''(-t ${toString timesteps} -p ${toString particles})''; + env = '' + export I_MPI_THREAD_SPLIT=1 + ''; + }; + + nbodyFn = conf: + with conf; + nbody.override { inherit cc mpi blocksize gitBranch gitURL; }; + + pipeline = conf: + sbatch conf ( + srun ( + nixsetupWrapper ( + argv conf ( + nbodyFn conf + ) + ) + ) + ) + ; + + # Ideally it should look like this: + #pipeline = sbatch nixsetup control argv nbodyFn; + + jobs = map pipeline configs; + +in + launchWrapper jobs diff --git a/garlic/exp/saiph/numcomm.nix b/garlic/exp/saiph/numcomm.nix new file mode 100644 index 0000000..0bf21c5 --- /dev/null +++ b/garlic/exp/saiph/numcomm.nix @@ -0,0 +1,136 @@ +{ + stdenv +, nixpkgs +, pkgs +, genApp +, genConfigs +, runWrappers +}: + +with stdenv.lib; + +let + # Set variable configuration for the experiment + varConfig = { + numComm = [ 1 ]; + }; + + # Common configuration + common = { + # Compile time nbody config + gitBranch = "Saiph_TAMPI_OMPSS"; + mpi = pkgs.bsc.impi; + + # Resources + ntasksPerSocket = "1"; + nodes = "2"; + + # Stage configuration + enableSbatch = true; + enableControl = true; + enableExtrae = false; + enablePerf = false; + + # MN4 path + nixPrefix = "/gpfs/projects/bsc15/nix"; + }; + + # Compute the cartesian product of all configurations + configs = map (conf: conf // common) (genConfigs varConfig); + + stageProgram = stage: + if stage ? programPath + then "${stage}${stage.programPath}" else "${stage}"; + + w = runWrappers; + + sbatch = {stage, conf, ...}: with conf; w.sbatch { + program = stageProgram stage; + exclusive = true; + time = "02:00:00"; + qos = "debug"; + jobName = "saiph"; + inherit nixPrefix nodes ntasksPerSocket; + }; + + control = {stage, conf, ...}: with conf; w.control { + program = stageProgram stage; + }; + + srun = {stage, conf, ...}: with conf; w.srun { + program = stageProgram stage; + srunOptions = "--cpu-bind=verbose,sockets"; + inherit nixPrefix; + }; + + statspy = {stage, conf, ...}: with conf; w.statspy { + program = stageProgram stage; + }; + + perf = {stage, conf, ...}: with conf; w.perf { + program = stageProgram stage; + perfArgs = "sched record -a"; + }; + + nixsetup = {stage, conf, ...}: with conf; w.nixsetup { + program = stageProgram stage; + }; + + extrae = {stage, conf, ...}: w.extrae { + program = stageProgram stage; + traceLib = "mpi"; # mpi -> libtracempi.so + configFile = ./extrae.xml; + }; + + bscOverlay = import ../../../overlay.nix; + + genPkgs = newOverlay: nixpkgs { + overlays = [ + bscOverlay + newOverlay + ]; + }; + + # We may be able to use overlays by invoking the fix function directly, but we + # have to get the definition of the bsc packages and the garlic ones as + # overlays. + + saiphFn = {stage, conf, ...}: with conf; + let + # We set the mpi implementation to the one specified in the conf, so all + # packages in bsc will use that one. + customPkgs = genPkgs (self: super: { + bsc = super.bsc // { mpi = conf.mpi; }; + }); + in + customPkgs.bsc.garlic.saiph.override { + inherit numComm mpi gitBranch; + }; + + stages = with common; [] + # Use sbatch to request resources first + ++ optional enableSbatch sbatch + + # Repeats the next stages N times + ++ optionals enableControl [ nixsetup control ] + + # Executes srun to launch the program in the requested nodes, and + # immediately after enters the nix environment again, as slurmstepd launches + # the next stages from outside the namespace. + ++ [ srun nixsetup ] + + # Intrumentation with extrae + ++ optional enableExtrae extrae + + # Optionally profile the next stages with perf + ++ optional enablePerf perf + + # Execute the nbody app with the argv and env vars + ++ [ saiphFn ]; + + # List of actual programs to be executed + jobs = map (conf: w.stagen { inherit conf stages; }) configs; + +in + # We simply run each program one after another + w.launch jobs diff --git a/garlic/exp/saiph/tampi.nix b/garlic/exp/saiph/tampi.nix new file mode 100644 index 0000000..4ec5d49 --- /dev/null +++ b/garlic/exp/saiph/tampi.nix @@ -0,0 +1,165 @@ +{ + stdenv +, nixpkgs +, pkgs +, genApp +, genConfigs +, runWrappers +}: + +with stdenv.lib; + +let + bsc = pkgs.bsc; + + # Set variable configuration for the experiment + varConfig = { + cc = [ bsc.icc ]; + mpi = [ bsc.impi bsc.openmpi ]; + blocksize = [ 1024 ]; + }; + + # Common configuration + common = { + # Compile time nbody config + gitBranch = "garlic/tampi+send+oss+task"; + + # nbody runtime options + particles = 1024*128; + timesteps = 20; + + # Resources + ntasksPerNode = "48"; + nodes = "1"; + + # Stage configuration + enableSbatch = true; + enableControl = true; + enableExtrae = false; + enablePerf = false; + enableCtf = false; + + # MN4 path + nixPrefix = "/gpfs/projects/bsc15/nix"; + }; + + # Compute the cartesian product of all configurations + configs = map (conf: conf // common) (genConfigs varConfig); + + stageProgram = stage: + if stage ? programPath + then "${stage}${stage.programPath}" else "${stage}"; + + w = runWrappers; + + sbatch = {stage, conf, ...}: with conf; w.sbatch { + program = stageProgram stage; + exclusive = true; + time = "02:00:00"; + qos = "debug"; + jobName = "nbody-bs"; + inherit nixPrefix nodes ntasksPerNode; + }; + + control = {stage, conf, ...}: with conf; w.control { + program = stageProgram stage; + }; + + srun = {stage, conf, ...}: with conf; w.srun { + program = stageProgram stage; + srunOptions = "--cpu-bind=verbose,rank"; + inherit nixPrefix; + }; + + statspy = {stage, conf, ...}: with conf; w.statspy { + program = stageProgram stage; + }; + + perf = {stage, conf, ...}: with conf; w.perf { + program = stageProgram stage; + perfArgs = "sched record -a"; + }; + + nixsetup = {stage, conf, ...}: with conf; w.nixsetup { + program = stageProgram stage; + }; + + extrae = {stage, conf, ...}: w.extrae { + program = stageProgram stage; + traceLib = "mpi"; # mpi -> libtracempi.so + configFile = ./extrae.xml; + }; + + ctf = {stage, conf, ...}: w.argv { + program = stageProgram stage; + env = '' + export NANOS6=ctf + export NANOS6_CTF2PRV=0 + ''; + }; + + argv = {stage, conf, ...}: w.argv { + program = stageProgram stage; + env = '' + set -e + export I_MPI_THREAD_SPLIT=1 + ''; + argv = ''( -t ${toString conf.timesteps} + -p ${toString conf.particles} )''; + }; + + bscOverlay = import ../../../overlay.nix; + + genPkgs = newOverlay: nixpkgs { + overlays = [ + bscOverlay + newOverlay + ]; + }; + + # We may be able to use overlays by invoking the fix function directly, but we + # have to get the definition of the bsc packages and the garlic ones as + # overlays. + + nbodyFn = {stage, conf, ...}: with conf; + let + # We set the mpi implementation to the one specified in the conf, so all + # packages in bsc will use that one. + customPkgs = genPkgs (self: super: { + bsc = super.bsc // { mpi = conf.mpi; }; + }); + in + customPkgs.bsc.garlic.nbody.override { + inherit cc blocksize mpi gitBranch; + }; + + stages = with common; [] + # Use sbatch to request resources first + ++ optional enableSbatch sbatch + + # Repeats the next stages N times + ++ optionals enableControl [ nixsetup control ] + + # Executes srun to launch the program in the requested nodes, and + # immediately after enters the nix environment again, as slurmstepd launches + # the next stages from outside the namespace. + ++ [ srun nixsetup ] + + # Intrumentation with extrae + ++ optional enableExtrae extrae + + # Optionally profile the next stages with perf + ++ optional enablePerf perf + + # Optionally profile nanos6 with the new ctf + ++ optional enableCtf ctf + + # Execute the nbody app with the argv and env vars + ++ [ argv nbodyFn ]; + + # List of actual programs to be executed + jobs = map (conf: w.stagen { inherit conf stages; }) configs; + +in + # We simply run each program one after another + w.launch jobs diff --git a/bsc/apps/saiph/default.nix b/garlic/saiph/default.nix similarity index 55% rename from bsc/apps/saiph/default.nix rename to garlic/saiph/default.nix index 45abfd9..4f04f40 100644 --- a/bsc/apps/saiph/default.nix +++ b/garlic/saiph/default.nix @@ -6,6 +6,8 @@ , mcxx , vtk , boost +, gitBranch ? "master" +, numComm ? null }: stdenv.mkDerivation rec { @@ -13,12 +15,14 @@ stdenv.mkDerivation rec { src = builtins.fetchGit { url = "ssh://git@bscpm02.bsc.es/DSLs/saiph.git"; - ref = "VectorisationSupport"; + ref = "${gitBranch}"; }; #src = /tmp/saiph; + + programPath = "/bin/ExHeat3D"; - enableParallelBuilding = true; + enableParallelBuilding = false; dontStrip = true; enableDebugging = true; @@ -31,26 +35,36 @@ stdenv.mkDerivation rec { boost ]; + hardeningDisable = [ "all" ]; + + hardeningEnable = [ "stackprotector" ]; + + postPatch = '' + + sed -i 's/^SANITIZE_FLAGS=/SANITIZE_FLAGS=$(DEBUG_FLAGS)/g' \ + saiphv2/cpp/src/Makefile.clang + ''; + preBuild = '' cd saiphv2/cpp/src - sed -i s/skylake-avx512/core-avx2/g Makefile* export VTK_VERSION=8.2 export VTK_HOME=${vtk} + export BOOST_HOME=${boost} export SAIPH_HOME=. - export NIX_CFLAGS_COMPILE+=" -fsanitize=address" ''; makeFlags = [ "-f" "Makefile.clang" "apps" - "APP=ExHeat" + "APP=ExHeat3D" + ( if (numComm != null) then "NUM_COMM=${toString numComm}" else "" ) ]; installPhase = '' mkdir -p $out/lib mkdir -p $out/bin cp obj/libsaiphv2.so $out/lib/ - cp bin/ExHeat $out/bin/ + cp bin/ExHeat3D $out/bin/ ''; } diff --git a/garlic/stages/sbatch.nix b/garlic/stages/sbatch.nix index 19a5776..8cbddfd 100644 --- a/garlic/stages/sbatch.nix +++ b/garlic/stages/sbatch.nix @@ -11,6 +11,7 @@ , binary ? "/bin/run" , ntasks ? null , ntasksPerNode ? null +, ntasksPerSocket ? null , nodes ? null , exclusive ? true # By default we run in exclusive mode , qos ? null @@ -54,6 +55,7 @@ stdenv.mkDerivation rec { '' + sbatchOpt "ntasks" ntasks + sbatchOpt "ntasks-per-node" ntasksPerNode + + sbatchOpt "ntasks-per-socket" ntasksPerSocket + sbatchOpt "nodes" nodes + sbatchOpt "chdir" "${chdirPrefix}/$(basename $out)" + sbatchOpt "output" output diff --git a/overlay.nix b/overlay.nix index 81f5d50..da0f5fd 100644 --- a/overlay.nix +++ b/overlay.nix @@ -92,10 +92,9 @@ let mcxx = self.bsc.mcxxGit; # Use nanos6 git by default - nanos6 = self.bsc.nanos6-git; - nanos6-latest = callPackage ./bsc/nanos6/default.nix { }; - - nanos6-git = callPackage ./bsc/nanos6/git.nix { }; + nanos6 = self.bsc.nanos6Git; + nanos6Latest = callPackage ./bsc/nanos6/default.nix { }; + nanos6Git = callPackage ./bsc/nanos6/git.nix { }; vtk = callPackage ./bsc/vtk/default.nix { inherit (self.xorg) libX11 xorgproto libXt; @@ -103,17 +102,17 @@ let dummy = callPackage ./bsc/dummy/default.nix { }; - clang-ompss2-unwrapped = callPackage ./bsc/llvm-ompss2/clang.nix { + clangOmpss2Unwrapped = callPackage ./bsc/llvm-ompss2/clang.nix { stdenv = self.llvmPackages_10.stdenv; enableDebug = false; }; - clang-ompss2 = callPackage bsc/llvm-ompss2/default.nix { - clang-ompss2-unwrapped = self.bsc.clang-ompss2-unwrapped; + clangOmpss2 = callPackage bsc/llvm-ompss2/default.nix { + clangOmpss2Unwrapped = self.bsc.clangOmpss2Unwrapped; }; stdenvOmpss2 = self.clangStdenv.override { - cc = self.bsc.clang-ompss2; + cc = self.bsc.clangOmpss2; }; cpic = callPackage ./bsc/apps/cpic/default.nix { @@ -143,6 +142,10 @@ let gitBranch = "garlic/seq"; }; + saiph = callPackage ./garlic/saiph { + stdenv = self.bsc.stdenvOmpss2; + }; + # Execution wrappers runWrappers = { sbatch = callPackage ./garlic/stages/sbatch.nix { }; @@ -183,6 +186,17 @@ let }; # mpi = callPackage ./bsc/garlic/exp/nbody/mpi.nix { }; }; + + saiph = { + numcomm = callPackage ./garlic/exp/saiph/numcomm.nix { + pkgs = self // self.bsc.garlic; + nixpkgs = import ; + genApp = self.bsc.garlic.genApp; + genConfigs = self.bsc.garlic.genConfigs; + runWrappers = self.bsc.garlic.runWrappers; + }; + }; + osu = rec { latency-internode = callPackage ./garlic/exp/osu/latency.nix { }; latency-intranode = callPackage ./garlic/exp/osu/latency.nix {