heat: split granularity with extended mode
The HWC version is not yet complete.
This commit is contained in:
parent
3566cf0152
commit
d1c32869c1
@ -5,10 +5,16 @@
|
||||
, targetMachine
|
||||
, stages
|
||||
, garlicTools
|
||||
, writeText
|
||||
, enablePerf ? false
|
||||
, enableCTF ? false
|
||||
, enableHWC ? false
|
||||
, enableExtended ? false
|
||||
}:
|
||||
|
||||
# TODO: Finish HWC first
|
||||
assert (enableHWC == false);
|
||||
|
||||
with stdenv.lib;
|
||||
with garlicTools;
|
||||
|
||||
@ -17,10 +23,6 @@ let
|
||||
varConf = with bsc; {
|
||||
cbs = range2 32 4096;
|
||||
rbs = range2 32 4096;
|
||||
#cbs = [ 64 256 1024 4096 ];
|
||||
#rbs = [ 32 128 512 1024 ];
|
||||
#cbs = [ 4096 ];
|
||||
#rbs = [ 512 ];
|
||||
};
|
||||
|
||||
machineConfig = targetMachine.config;
|
||||
@ -38,12 +40,11 @@ let
|
||||
timesteps = 10;
|
||||
cols = 1024 * 16; # Columns
|
||||
rows = 1024 * 16; # Rows
|
||||
cbs = c.cbs;
|
||||
rbs = c.rbs;
|
||||
inherit (c) cbs rbs;
|
||||
gitBranch = "garlic/tampi+isend+oss+task";
|
||||
|
||||
# Repeat the execution of each unit 30 times
|
||||
loops = 1;
|
||||
loops = 10;
|
||||
|
||||
# Resources
|
||||
qos = "debug";
|
||||
@ -55,10 +56,20 @@ let
|
||||
jobName = unitName;
|
||||
};
|
||||
|
||||
filterConfigs = c: let
|
||||
# Too small sizes lead to huge overheads
|
||||
goodSize = (c.cbs * c.rbs >= 1024);
|
||||
# When the extended units are not enabled, we only select those in
|
||||
# the diagonal.
|
||||
extended = if (enableExtended) then true
|
||||
else c.cbs == c.rbs;
|
||||
in
|
||||
goodSize && extended;
|
||||
|
||||
# Compute the array of configurations
|
||||
configs = stdexp.buildConfigs {
|
||||
configs = filter (filterConfigs) (stdexp.buildConfigs {
|
||||
inherit varConf genConf;
|
||||
};
|
||||
});
|
||||
|
||||
perf = {nextStage, conf, ...}: stages.perf {
|
||||
inherit nextStage;
|
||||
@ -66,54 +77,71 @@ let
|
||||
"-e cycles,instructions,cache-references,cache-misses";
|
||||
};
|
||||
|
||||
ctf = {nextStage, conf, ...}: with conf; stages.exec {
|
||||
ctf = {nextStage, conf, ...}: let
|
||||
# Create the nanos6 configuration file
|
||||
nanos6ConfigFile = writeText "nanos6.toml" ''
|
||||
version.instrument = "ctf"
|
||||
turbo.enabled = false
|
||||
instrument.ctf.converter.enabled = false
|
||||
'' + optionalString (enableHWC) ''
|
||||
hardware_counters.papi.enabled = true
|
||||
hardware_counters.papi.counters = [
|
||||
"PAPI_TOT_INS", "PAPI_TOT_CYC",
|
||||
"PAPI_L1_TCM", "PAPI_L2_TCM", "PAPI_L3_TCM"
|
||||
]
|
||||
'';
|
||||
|
||||
in stages.exec {
|
||||
inherit nextStage;
|
||||
|
||||
# And use it
|
||||
env = ''
|
||||
export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf,\
|
||||
instrument.ctf.converter.enabled=false"
|
||||
export NANOS6_CONFIG=${nanos6ConfigFile}
|
||||
'';
|
||||
# Only one process converts the trace, otherwise use:
|
||||
# if [ $SLURM_PROCID == 0 ]; then
|
||||
# ...
|
||||
# fi
|
||||
post = ''
|
||||
if [ $SLURM_PROCID == 0 ]; then
|
||||
sleep 2
|
||||
for tracedir in trace_*; do
|
||||
offset=$(grep 'offset =' $tracedir/ctf/ust/uid/1000/64-bit/metadata | \
|
||||
grep -o '[0-9]*')
|
||||
echo "offset = $offset"
|
||||
|
||||
start_time=$(awk '/^start_time / {print $2}' stdout.log)
|
||||
end_time=$(awk '/^end_time / {print $2}' stdout.log)
|
||||
# FIXME: We should run a hook *after* srun has ended, so we can
|
||||
# execute it in one process only (not in N ranks). This hack works
|
||||
# with one process only. Or be able to compute the name of the trace
|
||||
# directory so we can begin the conversion in parallel
|
||||
post = assert (conf.nodes * conf.ntasksPerNode == 1); ''
|
||||
tracedir=$(ls -d trace_* | head -1)
|
||||
echo "using tracedir=$tracedir"
|
||||
|
||||
begin=$(awk "BEGIN{print $start_time*1e9 - $offset}")
|
||||
end=$(awk "BEGIN{print $end_time*1e9 - $offset}")
|
||||
offset=$(grep 'offset =' $tracedir/ctf/ust/uid/1000/64-bit/metadata | \
|
||||
grep -o '[0-9]*')
|
||||
echo "offset = $offset"
|
||||
|
||||
echo "only events between $begin and $end"
|
||||
start_time=$(awk '/^start_time / {print $2}' stdout.log)
|
||||
end_time=$(awk '/^end_time / {print $2}' stdout.log)
|
||||
|
||||
${bsc.cn6}/bin/cn6 -s $tracedir
|
||||
begin=$(awk "BEGIN{print $start_time*1e9 - $offset}")
|
||||
end=$(awk "BEGIN{print $end_time*1e9 - $offset}")
|
||||
|
||||
${bsc.cn6}/bin/cut $begin $end < $tracedir/prv/trace.prv |\
|
||||
${bsc.cn6}/bin/hcut 1 ${toString conf.cpusPerTask} \
|
||||
> $tracedir/prv/trace-cut.prv
|
||||
echo "only events between $begin and $end"
|
||||
|
||||
${bsc.cn6}/bin/dur 6400025 0 < $tracedir/prv/trace-cut.prv |\
|
||||
awk '{s+=$1} END {print s}' >> .garlic/time_mode_dead.csv &
|
||||
${bsc.cn6}/bin/cn6 -s $tracedir
|
||||
|
||||
${bsc.cn6}/bin/dur 6400025 1 < $tracedir/prv/trace-cut.prv |\
|
||||
awk '{s+=$1} END {print s}' >> .garlic/time_mode_runtime.csv &
|
||||
${bsc.cn6}/bin/cut $begin $end < $tracedir/prv/trace.prv |\
|
||||
${bsc.cn6}/bin/hcut 1 ${toString conf.cpusPerTask} \
|
||||
> $tracedir/prv/trace-cut.prv
|
||||
|
||||
${bsc.cn6}/bin/dur 6400025 3 < $tracedir/prv/trace-cut.prv |\
|
||||
awk '{s+=$1} END {print s}' >> .garlic/time_mode_task.csv &
|
||||
${bsc.cn6}/bin/dur 6400025 0 < $tracedir/prv/trace-cut.prv |\
|
||||
awk '{s+=$1} END {print s}' >> .garlic/time_mode_dead.csv &
|
||||
|
||||
wait
|
||||
${bsc.cn6}/bin/dur 6400025 1 < $tracedir/prv/trace-cut.prv |\
|
||||
awk '{s+=$1} END {print s}' >> .garlic/time_mode_runtime.csv &
|
||||
|
||||
# Remove the traces at the end, as they are huge
|
||||
rm -rf $tracedir
|
||||
done
|
||||
fi
|
||||
'';
|
||||
${bsc.cn6}/bin/dur 6400025 3 < $tracedir/prv/trace-cut.prv |\
|
||||
awk '{s+=$1} END {print s}' >> .garlic/time_mode_task.csv &
|
||||
|
||||
wait
|
||||
|
||||
# Remove the traces at the end, as they are huge
|
||||
rm -rf $tracedir
|
||||
'';
|
||||
# TODO: To enable HWC we need to first add a taskwait before the
|
||||
# first get_time() measurement, otherwise we get the HWC of the
|
||||
# main task, which will be huge.
|
||||
};
|
||||
|
||||
exec = {nextStage, conf, ...}: stages.exec {
|
||||
|
Loading…
Reference in New Issue
Block a user