Add saiph experiments

This commit is contained in:
Rodrigo Arias 2020-09-21 17:30:24 +02:00
parent 126f05e92c
commit cc101ad1d3
12 changed files with 655 additions and 25 deletions

View File

@ -14,7 +14,8 @@ stdenv.mkDerivation rec {
cat > $out/bin/dummy <<EOF
#!/bin/sh
echo Hello world!
echo Hello worlda!
EOF
chmod +x $out/bin/dummy

View File

@ -76,7 +76,6 @@ stdenv.mkDerivation rec {
src = builtins.fetchGit {
url = "ssh://git@bscpm02.bsc.es/llvm-ompss/llvm-mono.git";
rev = "e1c73c3691d2685a99d99e14c6110d2c880662c6";
ref = "master";
};
}

View File

@ -2,7 +2,7 @@
stdenv
, gcc
, nanos6
, clang-ompss2-unwrapped
, clangOmpss2Unwrapped
, wrapCCWith
}:
@ -11,7 +11,7 @@ let
targetConfig = stdenv.targetPlatform.config;
inherit gcc nanos6;
in wrapCCWith rec {
cc = clang-ompss2-unwrapped;
cc = clangOmpss2Unwrapped;
extraBuildCommands = ''
echo "-target ${targetConfig}" >> $out/nix-support/cc-cflags
echo "-B${gcc.cc}/lib/gcc/${targetConfig}/${gcc.version}" >> $out/nix-support/cc-cflags

View File

@ -1,5 +1,4 @@
{ stdenv
, fetchgit
, autoreconfHook
, nanos6
, gperf
@ -13,9 +12,8 @@
}:
stdenv.mkDerivation rec {
name = "mcxx";
#version attribute ignored when using fetchgit:
#version = "2.2.0-70a299cf";
pname = "mcxx";
version = "${src.shortRev}";
passthru = {
CC = "mcc";
@ -23,10 +21,9 @@ stdenv.mkDerivation rec {
};
# Use patched Extrae version
src = fetchgit {
src = builtins.fetchGit {
url = "https://github.com/bsc-pm/mcxx";
rev = "70a299cfeb1f96735e6b9835aee946451f1913b2";
sha256 = "1n8y0h47jm2ll67xbz930372xkl9647z12lfwz2472j3y86yxpmw";
ref = "master";
};
enableParallelBuilding = true;
@ -55,5 +52,7 @@ stdenv.mkDerivation rec {
configureFlags = [
"--enable-ompss-2"
"--with-nanos6=${nanos6}"
# Fails with "memory exhausted" with bison 3.7.1
# "--enable-bison-regeneration"
];
}

View File

@ -49,6 +49,7 @@ stdenv.mkDerivation rec {
configureFlags = [
"--enable-ompss-2"
"--with-nanos6=${nanos6}"
# "--enable-bison-regeneration"
];
# Regenerate ia32 builtins to add the ones for gcc9

211
garlic/exp/saiph/extrae.xml Normal file
View File

@ -0,0 +1,211 @@
<?xml version='1.0'?>
<!-- Here comes the Extrae configuration.
As a general rule, "enabled" means that the feature is enabled :) If
it's not enabled, then the value can be set to some default.
-->
<!-- Must we activate the tracing? Which is the tracing mode? (detail/bursts) Where is it located? Which kind of trace? Version of the XML parser?-->
<trace enabled="yes"
home="/nix/store/j80mlqa12d1baifg30jsx2smv90akzvc-extrae"
initial-mode="detail"
type="paraver"
>
<!-- Configuration of some MPI dependant values -->
<mpi enabled="yes">
<!-- Gather counters in the MPI routines? -->
<counters enabled="yes" />
</mpi>
<!-- Emit information of the callstack -->
<callers enabled="yes">
<!-- At MPI calls, select depth level -->
<mpi enabled="yes">1-3</mpi>
<!-- At sampling points, select depth level -->
<sampling enabled="yes">1-5</sampling>
<!-- At dynamic memory system calls -->
<dynamic-memory enabled="no">1-3</dynamic-memory>
<!-- At I/O system calls -->
<input-output enabled="no">1-3</input-output>
<!-- At other system calls -->
<syscall enabled="no">1-3</syscall>
</callers>
<!-- Configuration of some OpenMP dependant values -->
<openmp enabled="no" ompt="no">
<!-- If the library instruments OpenMP, shall we gather info about locks?
Obtaining such information can make the final trace quite large.
-->
<locks enabled="no" />
<!-- Gather info about taskloops? -->
<taskloop enabled="no" />
<!-- Gather counters in the OpenMP routines? -->
<counters enabled="yes" />
</openmp>
<!-- Configuration of some pthread dependant values -->
<pthread enabled="no">
<!-- If the library instruments pthread, shall we gather info about locks,
mutexs and conds?
Obtaining such information can make the final trace quite large.
-->
<locks enabled="no" />
<!-- Gather counters in the pthread routines? -->
<counters enabled="yes" />
</pthread>
<!-- Configuration of User Functions -->
<user-functions enabled="no" list="/home/bsc41/bsc41273/user-functions.dat" exclude-automatic-functions="no">
<!-- Gather counters on the UF routines? -->
<counters enabled="yes" />
</user-functions>
<!-- Configure which software/hardware counters must be collected -->
<counters enabled="yes">
<!-- Configure the CPU hardware counters. You can define here as many sets
as you want. You can also define if MPI/OpenMP calls must report such
counters.
Starting-set property defines which set is chosen from every task.
Possible values are:
- cyclic : The sets are distributed in a cyclic fashion among all
tasks. So Task 0 takes set 1, Task 1 takes set 2,...
- block : The sets are distributed in block fashion among all tasks.
Task [0..i-1] takes set 1, Task [i..2*i-1] takes set 2, ...
- Number : All the tasks will start with the given set
(from 1..N).
-->
<cpu enabled="yes" starting-set-distribution="1">
<!-- In this example, we configure two sets of counters. The first will
be changed into the second after 5 calls to some collective
operation on MPI_COMM_WORLD. Once the second is activated, it will
turn to the first after 5seconds (aprox. depending on the MPI calls
granularity)
If you want that any set be counting forever, just don't set
changeat-globalops, or, changeat-time.
Each set has it's own properties.
domain -> in which domain must PAPI obtain the information (see
PAPI info)
changeat-globalops=num -> choose the next set after num
MPI_COMM_WORLD operations
changeat-time=numTime -> choose the next set after num Time
(for example 5s, 15m (for ms), 10M (for minutes),..)
-->
<set enabled="yes" domain="all">
PAPI_TOT_INS,PAPI_TOT_CYC
</set>
</cpu>
<!-- Do we want to gather information of the network counters?
Nowadays we can gather information about MX/GM cards.
-->
<network enabled="no" />
<!-- Obtain resource usage information -->
<resource-usage enabled="no" />
<!-- Obtain malloc statistics -->
<memory-usage enabled="no" />
</counters>
<!-- Define the characteristics of the tracing storage. If not defined,
or set, the tracing will send the traces to the current directory
with a default output name.
-->
<storage enabled="no">
<!-- The intermediate files will take the name of the application -->
<trace-prefix enabled="yes">TRACE</trace-prefix>
<!-- Stop the tracing when the intermediate file reaches this amount of MBs -->
<size enabled="no">5</size>
<!-- Where must we store the MPIT files while the app runs? -->
<temporal-directory enabled="yes">/scratch</temporal-directory>
<!-- Where must we store the MPIT files once the app ends? -->
<final-directory enabled="yes">/gpfs/scratch/bsc41/bsc41273</final-directory>
</storage>
<!-- Buffer configuration -->
<buffer enabled="yes">
<!-- How many events can we handle before any flush -->
<size enabled="yes">5000000</size>
<!-- Use the event buffer in a circular manner? You can use this option to
trace the last set of events. Needs MPI global routines operating on
MPI_COMM_WORLD communicator to be merged
-->
<circular enabled="no" />
</buffer>
<!-- Control tracing -->
<trace-control enabled="no">
<!-- We can start the application with a "latent tracing" and wake it up
once a control file is created. Use the property 'frequency' to
choose at which frequency this check must be done. If not supplied,
it will be checked every 100 global operations on MPI_COMM_WORLD.
-->
<file enabled="no" frequency="5M">/gpfs/scratch/bsc41/bsc41273/control</file>
<!--
-->
<global-ops enabled="no"></global-ops>
</trace-control>
<others enabled="yes">
<!-- Want to force a minimum amount of time of tracing? Here we force 10
minutes -->
<minimum-time enabled="no">10M</minimum-time>
<!-- Capture the following signals to finish cleanly -->
<finalize-on-signal enabled="yes"
SIGUSR1="no" SIGUSR2="no" SIGINT="yes"
SIGQUIT="yes" SIGTERM="yes" SIGXCPU="yes"
SIGFPE="yes" SIGSEGV="yes" SIGABRT="yes"
/>
<!-- Use instrumentation poitns to flush sampling buffer -->
<flush-sampling-buffer-at-instrumentation-point enabled="yes" />
</others>
<!-- Bursts library enabled? This requires an special library! -->
<bursts enabled="no">
<!-- Specify the threshold. This is mandatory! In this example, the
threshold is limitted to 500 microseconds
-->
<threshold enabled="yes">500u</threshold>
<!-- Report MPI statistics? -->
<mpi-statistics enabled="yes" />
</bursts>
<!-- Enable sampling capabilities using system clock.
Type may refer to: default, real, prof and virtual.
Period stands for the sampling period (50ms here)
plus a variability of 10ms, which means periods from
45 to 55ms.
-->
<sampling enabled="no" type="default" period="50m" variability="10m" />
<!-- Enable dynamic memory instrumentation (experimental) -->
<dynamic-memory enabled="no" />
<!-- Enable I/O (read, write) instrumentation (experimental) -->
<input-output enabled="no" internals="no"/>
<!-- Enable system calls instrumentation (experimental) -->
<syscall enabled="no" />
<!-- Do merge the intermediate tracefiles into the final tracefile?
Named according to the binary name
options:
synchronization = { default, task, node, no } (default is node)
max-memory = Number (in Mbytes) max memory used in merge step
joint-states = { yes, no } generate joint states?
keep-mpits = { yes, no } keep mpit files after merge?
-->
<merge enabled="yes"
synchronization="default"
tree-fan-out="16"
max-memory="512"
joint-states="yes"
keep-mpits="yes"
sort-addresses="yes"
overwrite="yes"
/>
</trace>

88
garlic/exp/saiph/mpi.nix Normal file
View File

@ -0,0 +1,88 @@
{
bsc
, nbody
, genApp
, genConfigs
# Wrappers
, launchWrapper
, sbatchWrapper
, srunWrapper
, argvWrapper
, controlWrapper
, nixsetupWrapper
}:
let
# Set the configuration for the experiment
config = {
cc = [ bsc.icc ];
blocksize = [ 2048 ];
mpi = [ bsc.impi bsc.openmpi bsc.mpich ];
};
extraConfig = {
particles = 32*1024;
timesteps = 10;
ntasksPerNode = 2;
nodes = 1;
time = "00:10:00";
qos = "debug";
#mpi = bsc.impi;
#mpi = bsc.openmpi;
gitBranch = "garlic/mpi+send";
gitURL = "ssh://git@bscpm02.bsc.es/garlic/apps/nbody.git";
};
# Compute the cartesian product of all configurations
configs = map (conf: conf // extraConfig) (genConfigs config);
sbatch = conf: app: sbatchWrapper {
app = app;
nixPrefix = "/gpfs/projects/bsc15/nix";
exclusive = false;
ntasksPerNode = "${toString conf.ntasksPerNode}";
nodes = "${toString conf.nodes}";
time = conf.time;
qos = conf.qos;
chdirPrefix = "/home/bsc15/bsc15557/bsc-nixpkgs/out";
};
srun = app: srunWrapper {
app = app;
nixPrefix = "/gpfs/projects/bsc15/nix";
};
argv = conf: app:
with conf;
argvWrapper {
app = app;
argv = ''(-t ${toString timesteps} -p ${toString particles})'';
env = ''
export I_MPI_THREAD_SPLIT=1
'';
};
nbodyFn = conf:
with conf;
nbody.override { inherit cc mpi blocksize gitBranch gitURL; };
pipeline = conf:
sbatch conf (
srun (
nixsetupWrapper (
argv conf (
nbodyFn conf
)
)
)
)
;
# Ideally it should look like this:
#pipeline = sbatch nixsetup control argv nbodyFn;
jobs = map pipeline configs;
in
launchWrapper jobs

View File

@ -0,0 +1,136 @@
{
stdenv
, nixpkgs
, pkgs
, genApp
, genConfigs
, runWrappers
}:
with stdenv.lib;
let
# Set variable configuration for the experiment
varConfig = {
numComm = [ 1 ];
};
# Common configuration
common = {
# Compile time nbody config
gitBranch = "Saiph_TAMPI_OMPSS";
mpi = pkgs.bsc.impi;
# Resources
ntasksPerSocket = "1";
nodes = "2";
# Stage configuration
enableSbatch = true;
enableControl = true;
enableExtrae = false;
enablePerf = false;
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
};
# Compute the cartesian product of all configurations
configs = map (conf: conf // common) (genConfigs varConfig);
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
sbatch = {stage, conf, ...}: with conf; w.sbatch {
program = stageProgram stage;
exclusive = true;
time = "02:00:00";
qos = "debug";
jobName = "saiph";
inherit nixPrefix nodes ntasksPerSocket;
};
control = {stage, conf, ...}: with conf; w.control {
program = stageProgram stage;
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,sockets";
inherit nixPrefix;
};
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
bscOverlay = import ../../../overlay.nix;
genPkgs = newOverlay: nixpkgs {
overlays = [
bscOverlay
newOverlay
];
};
# We may be able to use overlays by invoking the fix function directly, but we
# have to get the definition of the bsc packages and the garlic ones as
# overlays.
saiphFn = {stage, conf, ...}: with conf;
let
# We set the mpi implementation to the one specified in the conf, so all
# packages in bsc will use that one.
customPkgs = genPkgs (self: super: {
bsc = super.bsc // { mpi = conf.mpi; };
});
in
customPkgs.bsc.garlic.saiph.override {
inherit numComm mpi gitBranch;
};
stages = with common; []
# Use sbatch to request resources first
++ optional enableSbatch sbatch
# Repeats the next stages N times
++ optionals enableControl [ nixsetup control ]
# Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace.
++ [ srun nixsetup ]
# Intrumentation with extrae
++ optional enableExtrae extrae
# Optionally profile the next stages with perf
++ optional enablePerf perf
# Execute the nbody app with the argv and env vars
++ [ saiphFn ];
# List of actual programs to be executed
jobs = map (conf: w.stagen { inherit conf stages; }) configs;
in
# We simply run each program one after another
w.launch jobs

165
garlic/exp/saiph/tampi.nix Normal file
View File

@ -0,0 +1,165 @@
{
stdenv
, nixpkgs
, pkgs
, genApp
, genConfigs
, runWrappers
}:
with stdenv.lib;
let
bsc = pkgs.bsc;
# Set variable configuration for the experiment
varConfig = {
cc = [ bsc.icc ];
mpi = [ bsc.impi bsc.openmpi ];
blocksize = [ 1024 ];
};
# Common configuration
common = {
# Compile time nbody config
gitBranch = "garlic/tampi+send+oss+task";
# nbody runtime options
particles = 1024*128;
timesteps = 20;
# Resources
ntasksPerNode = "48";
nodes = "1";
# Stage configuration
enableSbatch = true;
enableControl = true;
enableExtrae = false;
enablePerf = false;
enableCtf = false;
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
};
# Compute the cartesian product of all configurations
configs = map (conf: conf // common) (genConfigs varConfig);
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
sbatch = {stage, conf, ...}: with conf; w.sbatch {
program = stageProgram stage;
exclusive = true;
time = "02:00:00";
qos = "debug";
jobName = "nbody-bs";
inherit nixPrefix nodes ntasksPerNode;
};
control = {stage, conf, ...}: with conf; w.control {
program = stageProgram stage;
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,rank";
inherit nixPrefix;
};
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
ctf = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
export NANOS6=ctf
export NANOS6_CTF2PRV=0
'';
};
argv = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
set -e
export I_MPI_THREAD_SPLIT=1
'';
argv = ''( -t ${toString conf.timesteps}
-p ${toString conf.particles} )'';
};
bscOverlay = import ../../../overlay.nix;
genPkgs = newOverlay: nixpkgs {
overlays = [
bscOverlay
newOverlay
];
};
# We may be able to use overlays by invoking the fix function directly, but we
# have to get the definition of the bsc packages and the garlic ones as
# overlays.
nbodyFn = {stage, conf, ...}: with conf;
let
# We set the mpi implementation to the one specified in the conf, so all
# packages in bsc will use that one.
customPkgs = genPkgs (self: super: {
bsc = super.bsc // { mpi = conf.mpi; };
});
in
customPkgs.bsc.garlic.nbody.override {
inherit cc blocksize mpi gitBranch;
};
stages = with common; []
# Use sbatch to request resources first
++ optional enableSbatch sbatch
# Repeats the next stages N times
++ optionals enableControl [ nixsetup control ]
# Executes srun to launch the program in the requested nodes, and
# immediately after enters the nix environment again, as slurmstepd launches
# the next stages from outside the namespace.
++ [ srun nixsetup ]
# Intrumentation with extrae
++ optional enableExtrae extrae
# Optionally profile the next stages with perf
++ optional enablePerf perf
# Optionally profile nanos6 with the new ctf
++ optional enableCtf ctf
# Execute the nbody app with the argv and env vars
++ [ argv nbodyFn ];
# List of actual programs to be executed
jobs = map (conf: w.stagen { inherit conf stages; }) configs;
in
# We simply run each program one after another
w.launch jobs

View File

@ -6,6 +6,8 @@
, mcxx
, vtk
, boost
, gitBranch ? "master"
, numComm ? null
}:
stdenv.mkDerivation rec {
@ -13,12 +15,14 @@ stdenv.mkDerivation rec {
src = builtins.fetchGit {
url = "ssh://git@bscpm02.bsc.es/DSLs/saiph.git";
ref = "VectorisationSupport";
ref = "${gitBranch}";
};
#src = /tmp/saiph;
programPath = "/bin/ExHeat3D";
enableParallelBuilding = true;
enableParallelBuilding = false;
dontStrip = true;
enableDebugging = true;
@ -31,26 +35,36 @@ stdenv.mkDerivation rec {
boost
];
hardeningDisable = [ "all" ];
hardeningEnable = [ "stackprotector" ];
postPatch = ''
sed -i 's/^SANITIZE_FLAGS=/SANITIZE_FLAGS=$(DEBUG_FLAGS)/g' \
saiphv2/cpp/src/Makefile.clang
'';
preBuild = ''
cd saiphv2/cpp/src
sed -i s/skylake-avx512/core-avx2/g Makefile*
export VTK_VERSION=8.2
export VTK_HOME=${vtk}
export BOOST_HOME=${boost}
export SAIPH_HOME=.
export NIX_CFLAGS_COMPILE+=" -fsanitize=address"
'';
makeFlags = [
"-f" "Makefile.clang"
"apps"
"APP=ExHeat"
"APP=ExHeat3D"
( if (numComm != null) then "NUM_COMM=${toString numComm}" else "" )
];
installPhase = ''
mkdir -p $out/lib
mkdir -p $out/bin
cp obj/libsaiphv2.so $out/lib/
cp bin/ExHeat $out/bin/
cp bin/ExHeat3D $out/bin/
'';
}

View File

@ -11,6 +11,7 @@
, binary ? "/bin/run"
, ntasks ? null
, ntasksPerNode ? null
, ntasksPerSocket ? null
, nodes ? null
, exclusive ? true # By default we run in exclusive mode
, qos ? null
@ -54,6 +55,7 @@ stdenv.mkDerivation rec {
''
+ sbatchOpt "ntasks" ntasks
+ sbatchOpt "ntasks-per-node" ntasksPerNode
+ sbatchOpt "ntasks-per-socket" ntasksPerSocket
+ sbatchOpt "nodes" nodes
+ sbatchOpt "chdir" "${chdirPrefix}/$(basename $out)"
+ sbatchOpt "output" output

View File

@ -92,10 +92,9 @@ let
mcxx = self.bsc.mcxxGit;
# Use nanos6 git by default
nanos6 = self.bsc.nanos6-git;
nanos6-latest = callPackage ./bsc/nanos6/default.nix { };
nanos6-git = callPackage ./bsc/nanos6/git.nix { };
nanos6 = self.bsc.nanos6Git;
nanos6Latest = callPackage ./bsc/nanos6/default.nix { };
nanos6Git = callPackage ./bsc/nanos6/git.nix { };
vtk = callPackage ./bsc/vtk/default.nix {
inherit (self.xorg) libX11 xorgproto libXt;
@ -103,17 +102,17 @@ let
dummy = callPackage ./bsc/dummy/default.nix { };
clang-ompss2-unwrapped = callPackage ./bsc/llvm-ompss2/clang.nix {
clangOmpss2Unwrapped = callPackage ./bsc/llvm-ompss2/clang.nix {
stdenv = self.llvmPackages_10.stdenv;
enableDebug = false;
};
clang-ompss2 = callPackage bsc/llvm-ompss2/default.nix {
clang-ompss2-unwrapped = self.bsc.clang-ompss2-unwrapped;
clangOmpss2 = callPackage bsc/llvm-ompss2/default.nix {
clangOmpss2Unwrapped = self.bsc.clangOmpss2Unwrapped;
};
stdenvOmpss2 = self.clangStdenv.override {
cc = self.bsc.clang-ompss2;
cc = self.bsc.clangOmpss2;
};
cpic = callPackage ./bsc/apps/cpic/default.nix {
@ -143,6 +142,10 @@ let
gitBranch = "garlic/seq";
};
saiph = callPackage ./garlic/saiph {
stdenv = self.bsc.stdenvOmpss2;
};
# Execution wrappers
runWrappers = {
sbatch = callPackage ./garlic/stages/sbatch.nix { };
@ -183,6 +186,17 @@ let
};
# mpi = callPackage ./bsc/garlic/exp/nbody/mpi.nix { };
};
saiph = {
numcomm = callPackage ./garlic/exp/saiph/numcomm.nix {
pkgs = self // self.bsc.garlic;
nixpkgs = import <nixpkgs>;
genApp = self.bsc.garlic.genApp;
genConfigs = self.bsc.garlic.genConfigs;
runWrappers = self.bsc.garlic.runWrappers;
};
};
osu = rec {
latency-internode = callPackage ./garlic/exp/osu/latency.nix { };
latency-intranode = callPackage ./garlic/exp/osu/latency.nix {