Add extrae and perf stages
This commit is contained in:
parent
d05d32edbf
commit
d469ccd59d
@ -14,7 +14,7 @@ stdenv.mkDerivation {
|
||||
mkdir -p $out/bin
|
||||
cat > $out/bin/run <<EOF
|
||||
#!/bin/sh
|
||||
set -e
|
||||
#set -e
|
||||
for n in {1..30}; do
|
||||
$program/bin/run
|
||||
done
|
||||
|
@ -31,6 +31,14 @@ let
|
||||
controlWrapper = callPackage ./control.nix { };
|
||||
nixsetupWrapper = callPackage ./nix-setup.nix { };
|
||||
argvWrapper = callPackage ./argv.nix { };
|
||||
statspyWrapper = callPackage ./statspy.nix { };
|
||||
extraeWrapper = callPackage ./extrae.nix { };
|
||||
|
||||
# Perf is tied to a linux kernel specific version
|
||||
linuxPackages = bsc.linuxPackages_4_4;
|
||||
perfWrapper = callPackage ./perf.nix {
|
||||
perf = linuxPackages.perf;
|
||||
};
|
||||
|
||||
exp = {
|
||||
nbody = {
|
||||
|
@ -11,6 +11,9 @@
|
||||
, argvWrapper
|
||||
, controlWrapper
|
||||
, nixsetupWrapper
|
||||
, statspyWrapper
|
||||
, extraeWrapper
|
||||
, perfWrapper
|
||||
}:
|
||||
|
||||
let
|
||||
@ -24,7 +27,7 @@ let
|
||||
gitBranch = "garlic/mpi+send";
|
||||
mpi = bsc.impi;
|
||||
particles = 1024*128;
|
||||
timesteps = 10;
|
||||
timesteps = 100;
|
||||
ntasksPerNode = "48";
|
||||
nodes = "1";
|
||||
time = "02:00:00";
|
||||
@ -46,6 +49,7 @@ let
|
||||
srun = app: srunWrapper {
|
||||
app = app;
|
||||
nixPrefix = "/gpfs/projects/bsc15/nix";
|
||||
srunOptions = "--cpu-bind=verbose,rank";
|
||||
};
|
||||
|
||||
argv = conf: app:
|
||||
@ -59,18 +63,47 @@ let
|
||||
argv = ''(-t ${toString timesteps} -p ${toString particles})'';
|
||||
};
|
||||
|
||||
statspy = app:
|
||||
statspyWrapper {
|
||||
app = app;
|
||||
};
|
||||
|
||||
extrae = app:
|
||||
extraeWrapper {
|
||||
app = app;
|
||||
traceLib = "mpi";
|
||||
configFile = ./extrae.xml;
|
||||
};
|
||||
|
||||
perf = app:
|
||||
perfWrapper {
|
||||
app = app;
|
||||
perfArgs = "sched record -a";
|
||||
};
|
||||
|
||||
nbodyFn = conf:
|
||||
with conf;
|
||||
nbody.override { inherit cc mpi blocksize gitBranch; };
|
||||
|
||||
pipeline = conf:
|
||||
sbatch conf (
|
||||
nixsetupWrapper (
|
||||
controlWrapper (
|
||||
# sbatch conf (
|
||||
# nixsetupWrapper (
|
||||
# controlWrapper (
|
||||
srun (
|
||||
nixsetupWrapper (
|
||||
# extrae (
|
||||
# perf (
|
||||
argv conf (
|
||||
nbodyFn conf))))));
|
||||
nbodyFn conf
|
||||
)
|
||||
# )
|
||||
# )
|
||||
)
|
||||
)
|
||||
# )
|
||||
# )
|
||||
# )
|
||||
;
|
||||
|
||||
# Ideally it should look like this:
|
||||
#pipeline = sbatch nixsetup control argv nbodyFn;
|
||||
|
211
bsc/garlic/exp/nbody/extrae.xml
Normal file
211
bsc/garlic/exp/nbody/extrae.xml
Normal file
@ -0,0 +1,211 @@
|
||||
<?xml version='1.0'?>
|
||||
|
||||
<!-- Here comes the Extrae configuration.
|
||||
As a general rule, "enabled" means that the feature is enabled :) If
|
||||
it's not enabled, then the value can be set to some default.
|
||||
-->
|
||||
|
||||
<!-- Must we activate the tracing? Which is the tracing mode? (detail/bursts) Where is it located? Which kind of trace? Version of the XML parser?-->
|
||||
<trace enabled="yes"
|
||||
home="/nix/store/j80mlqa12d1baifg30jsx2smv90akzvc-extrae"
|
||||
initial-mode="detail"
|
||||
type="paraver"
|
||||
>
|
||||
|
||||
<!-- Configuration of some MPI dependant values -->
|
||||
<mpi enabled="yes">
|
||||
<!-- Gather counters in the MPI routines? -->
|
||||
<counters enabled="yes" />
|
||||
</mpi>
|
||||
|
||||
<!-- Emit information of the callstack -->
|
||||
<callers enabled="yes">
|
||||
<!-- At MPI calls, select depth level -->
|
||||
<mpi enabled="yes">1-3</mpi>
|
||||
<!-- At sampling points, select depth level -->
|
||||
<sampling enabled="yes">1-5</sampling>
|
||||
<!-- At dynamic memory system calls -->
|
||||
<dynamic-memory enabled="no">1-3</dynamic-memory>
|
||||
<!-- At I/O system calls -->
|
||||
<input-output enabled="no">1-3</input-output>
|
||||
<!-- At other system calls -->
|
||||
<syscall enabled="no">1-3</syscall>
|
||||
</callers>
|
||||
|
||||
<!-- Configuration of some OpenMP dependant values -->
|
||||
<openmp enabled="no" ompt="no">
|
||||
<!-- If the library instruments OpenMP, shall we gather info about locks?
|
||||
Obtaining such information can make the final trace quite large.
|
||||
-->
|
||||
<locks enabled="no" />
|
||||
<!-- Gather info about taskloops? -->
|
||||
<taskloop enabled="no" />
|
||||
<!-- Gather counters in the OpenMP routines? -->
|
||||
<counters enabled="yes" />
|
||||
</openmp>
|
||||
|
||||
<!-- Configuration of some pthread dependant values -->
|
||||
<pthread enabled="no">
|
||||
<!-- If the library instruments pthread, shall we gather info about locks,
|
||||
mutexs and conds?
|
||||
Obtaining such information can make the final trace quite large.
|
||||
-->
|
||||
<locks enabled="no" />
|
||||
<!-- Gather counters in the pthread routines? -->
|
||||
<counters enabled="yes" />
|
||||
</pthread>
|
||||
|
||||
<!-- Configuration of User Functions -->
|
||||
<user-functions enabled="no" list="/home/bsc41/bsc41273/user-functions.dat" exclude-automatic-functions="no">
|
||||
<!-- Gather counters on the UF routines? -->
|
||||
<counters enabled="yes" />
|
||||
</user-functions>
|
||||
|
||||
<!-- Configure which software/hardware counters must be collected -->
|
||||
<counters enabled="yes">
|
||||
<!-- Configure the CPU hardware counters. You can define here as many sets
|
||||
as you want. You can also define if MPI/OpenMP calls must report such
|
||||
counters.
|
||||
Starting-set property defines which set is chosen from every task.
|
||||
Possible values are:
|
||||
- cyclic : The sets are distributed in a cyclic fashion among all
|
||||
tasks. So Task 0 takes set 1, Task 1 takes set 2,...
|
||||
- block : The sets are distributed in block fashion among all tasks.
|
||||
Task [0..i-1] takes set 1, Task [i..2*i-1] takes set 2, ...
|
||||
- Number : All the tasks will start with the given set
|
||||
(from 1..N).
|
||||
-->
|
||||
<cpu enabled="yes" starting-set-distribution="1">
|
||||
<!-- In this example, we configure two sets of counters. The first will
|
||||
be changed into the second after 5 calls to some collective
|
||||
operation on MPI_COMM_WORLD. Once the second is activated, it will
|
||||
turn to the first after 5seconds (aprox. depending on the MPI calls
|
||||
granularity)
|
||||
If you want that any set be counting forever, just don't set
|
||||
changeat-globalops, or, changeat-time.
|
||||
|
||||
Each set has it's own properties.
|
||||
domain -> in which domain must PAPI obtain the information (see
|
||||
PAPI info)
|
||||
changeat-globalops=num -> choose the next set after num
|
||||
MPI_COMM_WORLD operations
|
||||
changeat-time=numTime -> choose the next set after num Time
|
||||
(for example 5s, 15m (for ms), 10M (for minutes),..)
|
||||
-->
|
||||
<set enabled="yes" domain="all">
|
||||
PAPI_TOT_INS,PAPI_TOT_CYC
|
||||
</set>
|
||||
</cpu>
|
||||
|
||||
<!-- Do we want to gather information of the network counters?
|
||||
Nowadays we can gather information about MX/GM cards.
|
||||
-->
|
||||
<network enabled="no" />
|
||||
|
||||
<!-- Obtain resource usage information -->
|
||||
<resource-usage enabled="no" />
|
||||
|
||||
<!-- Obtain malloc statistics -->
|
||||
<memory-usage enabled="no" />
|
||||
</counters>
|
||||
|
||||
<!-- Define the characteristics of the tracing storage. If not defined,
|
||||
or set, the tracing will send the traces to the current directory
|
||||
with a default output name.
|
||||
-->
|
||||
<storage enabled="no">
|
||||
<!-- The intermediate files will take the name of the application -->
|
||||
<trace-prefix enabled="yes">TRACE</trace-prefix>
|
||||
<!-- Stop the tracing when the intermediate file reaches this amount of MBs -->
|
||||
<size enabled="no">5</size>
|
||||
<!-- Where must we store the MPIT files while the app runs? -->
|
||||
<temporal-directory enabled="yes">/scratch</temporal-directory>
|
||||
<!-- Where must we store the MPIT files once the app ends? -->
|
||||
<final-directory enabled="yes">/gpfs/scratch/bsc41/bsc41273</final-directory>
|
||||
</storage>
|
||||
|
||||
<!-- Buffer configuration -->
|
||||
<buffer enabled="yes">
|
||||
<!-- How many events can we handle before any flush -->
|
||||
<size enabled="yes">5000000</size>
|
||||
<!-- Use the event buffer in a circular manner? You can use this option to
|
||||
trace the last set of events. Needs MPI global routines operating on
|
||||
MPI_COMM_WORLD communicator to be merged
|
||||
-->
|
||||
<circular enabled="no" />
|
||||
</buffer>
|
||||
|
||||
<!-- Control tracing -->
|
||||
<trace-control enabled="no">
|
||||
<!-- We can start the application with a "latent tracing" and wake it up
|
||||
once a control file is created. Use the property 'frequency' to
|
||||
choose at which frequency this check must be done. If not supplied,
|
||||
it will be checked every 100 global operations on MPI_COMM_WORLD.
|
||||
-->
|
||||
<file enabled="no" frequency="5M">/gpfs/scratch/bsc41/bsc41273/control</file>
|
||||
<!--
|
||||
-->
|
||||
<global-ops enabled="no"></global-ops>
|
||||
</trace-control>
|
||||
|
||||
<others enabled="yes">
|
||||
<!-- Want to force a minimum amount of time of tracing? Here we force 10
|
||||
minutes -->
|
||||
<minimum-time enabled="no">10M</minimum-time>
|
||||
<!-- Capture the following signals to finish cleanly -->
|
||||
<finalize-on-signal enabled="yes"
|
||||
SIGUSR1="no" SIGUSR2="no" SIGINT="yes"
|
||||
SIGQUIT="yes" SIGTERM="yes" SIGXCPU="yes"
|
||||
SIGFPE="yes" SIGSEGV="yes" SIGABRT="yes"
|
||||
/>
|
||||
<!-- Use instrumentation poitns to flush sampling buffer -->
|
||||
<flush-sampling-buffer-at-instrumentation-point enabled="yes" />
|
||||
</others>
|
||||
|
||||
|
||||
<!-- Bursts library enabled? This requires an special library! -->
|
||||
<bursts enabled="no">
|
||||
<!-- Specify the threshold. This is mandatory! In this example, the
|
||||
threshold is limitted to 500 microseconds
|
||||
-->
|
||||
<threshold enabled="yes">500u</threshold>
|
||||
<!-- Report MPI statistics? -->
|
||||
<mpi-statistics enabled="yes" />
|
||||
</bursts>
|
||||
|
||||
<!-- Enable sampling capabilities using system clock.
|
||||
Type may refer to: default, real, prof and virtual.
|
||||
Period stands for the sampling period (50ms here)
|
||||
plus a variability of 10ms, which means periods from
|
||||
45 to 55ms.
|
||||
-->
|
||||
<sampling enabled="no" type="default" period="50m" variability="10m" />
|
||||
|
||||
<!-- Enable dynamic memory instrumentation (experimental) -->
|
||||
<dynamic-memory enabled="no" />
|
||||
|
||||
<!-- Enable I/O (read, write) instrumentation (experimental) -->
|
||||
<input-output enabled="no" internals="no"/>
|
||||
|
||||
<!-- Enable system calls instrumentation (experimental) -->
|
||||
<syscall enabled="no" />
|
||||
|
||||
<!-- Do merge the intermediate tracefiles into the final tracefile?
|
||||
Named according to the binary name
|
||||
options:
|
||||
synchronization = { default, task, node, no } (default is node)
|
||||
max-memory = Number (in Mbytes) max memory used in merge step
|
||||
joint-states = { yes, no } generate joint states?
|
||||
keep-mpits = { yes, no } keep mpit files after merge?
|
||||
-->
|
||||
<merge enabled="yes"
|
||||
synchronization="default"
|
||||
tree-fan-out="16"
|
||||
max-memory="512"
|
||||
joint-states="yes"
|
||||
keep-mpits="yes"
|
||||
sort-addresses="yes"
|
||||
overwrite="yes"
|
||||
/>
|
||||
|
||||
</trace>
|
@ -64,7 +64,17 @@ let
|
||||
with conf;
|
||||
bsc.osumb.override { inherit mpi; };
|
||||
|
||||
pipeline = conf: sbatch conf (srun (nixsetupWrapper (argv (osumbFn conf))));
|
||||
|
||||
pipeline = conf:
|
||||
sbatch conf (
|
||||
nixsetupWrapper (
|
||||
controlWrapper (
|
||||
srun (
|
||||
nixsetupWrapper (
|
||||
argv (
|
||||
osumbFn conf))))));
|
||||
|
||||
#pipeline = conf: sbatch conf (srun (nixsetupWrapper (argv (osumbFn conf))));
|
||||
#pipeline = conf: sbatch conf (srun (nixsetupWrapper (argv bsc.osumb)));
|
||||
|
||||
# Ideally it should look like this:
|
||||
|
30
bsc/garlic/extrae.nix
Normal file
30
bsc/garlic/extrae.nix
Normal file
@ -0,0 +1,30 @@
|
||||
{
|
||||
stdenv
|
||||
, bash
|
||||
, extrae
|
||||
}:
|
||||
|
||||
{
|
||||
app
|
||||
, traceLib ? "mpi"
|
||||
, configFile
|
||||
, program ? "bin/run"
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation {
|
||||
name = "${app.name}-extrae";
|
||||
preferLocalBuild = true;
|
||||
phases = [ "installPhase" ];
|
||||
installPhase = ''
|
||||
mkdir -p $out/bin
|
||||
cat > $out/bin/run <<EOF
|
||||
#!${bash}/bin/bash
|
||||
|
||||
export EXTRAE_HOME=${extrae}
|
||||
export LD_PRELOAD=${extrae}/lib/lib${traceLib}trace.so:$LD_PRELOAD
|
||||
export EXTRAE_CONFIG_FILE=${configFile}
|
||||
exec ${app}/${program}
|
||||
EOF
|
||||
chmod +x $out/bin/run
|
||||
'';
|
||||
}
|
@ -12,10 +12,12 @@ with stdenv.lib;
|
||||
stdenv.mkDerivation rec {
|
||||
name = "nbody";
|
||||
|
||||
src = builtins.fetchGit {
|
||||
url = "${gitURL}";
|
||||
ref = "${gitBranch}";
|
||||
};
|
||||
src = /home/Computational/rarias/bsc-nixpkgs/manual/nbody;
|
||||
|
||||
#src = builtins.fetchGit {
|
||||
# url = "${gitURL}";
|
||||
# ref = "${gitBranch}";
|
||||
#};
|
||||
|
||||
buildInputs = [
|
||||
cc
|
||||
|
26
bsc/garlic/perf.nix
Normal file
26
bsc/garlic/perf.nix
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
stdenv
|
||||
, bash
|
||||
, perf
|
||||
}:
|
||||
|
||||
{
|
||||
app
|
||||
, perfArgs ? "record -a"
|
||||
, program ? "bin/run"
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation {
|
||||
name = "${app.name}-perf";
|
||||
preferLocalBuild = true;
|
||||
phases = [ "installPhase" ];
|
||||
installPhase = ''
|
||||
mkdir -p $out/bin
|
||||
cat > $out/bin/run <<EOF
|
||||
#!${bash}/bin/bash
|
||||
|
||||
exec ${perf}/bin/perf ${perfArgs} ${app}/${program}
|
||||
EOF
|
||||
chmod +x $out/bin/run
|
||||
'';
|
||||
}
|
@ -39,6 +39,7 @@ stdenv.mkDerivation rec {
|
||||
src = ./.;
|
||||
|
||||
buildInputs = [ app ];
|
||||
phases = [ "installPhase" ];
|
||||
|
||||
#SBATCH --tasks-per-node=48
|
||||
#SBATCH --ntasks-per-socket=24
|
||||
|
@ -1,7 +1,11 @@
|
||||
{
|
||||
stdenv
|
||||
}:
|
||||
{ app , nixPrefix ? "" }:
|
||||
{
|
||||
app
|
||||
, nixPrefix ? ""
|
||||
, srunOptions ? ""
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
name = "${app.name}-srun";
|
||||
@ -13,7 +17,7 @@ stdenv.mkDerivation rec {
|
||||
mkdir -p $out/bin
|
||||
cat > $out/bin/run <<EOF
|
||||
#!/bin/sh
|
||||
exec srun --mpi=pmi2 ${nixPrefix}${app}/bin/run
|
||||
exec srun --mpi=pmi2 ${srunOptions} ${nixPrefix}${app}/bin/run
|
||||
EOF
|
||||
chmod +x $out/bin/run
|
||||
'';
|
||||
|
29
bsc/garlic/statspy.nix
Normal file
29
bsc/garlic/statspy.nix
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
stdenv
|
||||
, bash
|
||||
}:
|
||||
|
||||
{
|
||||
app
|
||||
, outputDir ? "."
|
||||
, program ? "bin/run"
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation {
|
||||
name = "${app.name}-statspy";
|
||||
preferLocalBuild = true;
|
||||
phases = [ "installPhase" ];
|
||||
installPhase = ''
|
||||
mkdir -p $out/bin
|
||||
cat > $out/bin/run <<EOF
|
||||
#!${bash}/bin/bash
|
||||
|
||||
mkdir -p ${outputDir}
|
||||
cat /proc/[0-9]*/stat | sort -n > ${outputDir}/statspy.\$(date +%s.%3N).begin
|
||||
${app}/${program}
|
||||
cat /proc/[0-9]*/stat | sort -n > ${outputDir}/statspy.\$(date +%s.%3N).end
|
||||
|
||||
EOF
|
||||
chmod +x $out/bin/run
|
||||
'';
|
||||
}
|
Loading…
Reference in New Issue
Block a user