fwi: generate the model in every node
As we are using local storage, we need a copy of the input in every node. The current method is to run the generator only in the rank which has assigned the cpu 0 in the mask.
This commit is contained in:
parent
58dc277d3d
commit
99beac9b23
@ -40,67 +40,70 @@ rec {
|
||||
ntasksPerNode = hw.cpusPerNode;
|
||||
};
|
||||
|
||||
srun = {nextStage, conf, ...}:
|
||||
exec = {nextStage, conf, ...}:
|
||||
let
|
||||
fwiParams = bsc.apps.fwi.params.override {
|
||||
inherit (conf) nx ny nz;
|
||||
};
|
||||
in
|
||||
stdexp.stdStages.srun {
|
||||
inherit nextStage conf;
|
||||
# Now we add some commands to execute before calling srun. These will
|
||||
# only run in one rank (the first in the list of allocated nodes)
|
||||
preSrun = ''
|
||||
export GARLIC_FWI_SRUNDIR=$(pwd)
|
||||
export GARLIC_FWI_EXECDIR="${conf.tempDir}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN"
|
||||
mkdir -p "$GARLIC_FWI_EXECDIR"
|
||||
|
||||
export GARLIC_FWI_PARAMS="${fwiParams}/fwi_params.txt"
|
||||
export GARLIC_FWI_FREQ="${fwiParams}/fwi_frequencies.txt"
|
||||
|
||||
# We cannot change the working directory of srun, so we use a
|
||||
# subshell to ignore the cd
|
||||
(
|
||||
# Generate the input dataset
|
||||
>&2 echo "generating the input dataset"
|
||||
cd "$GARLIC_FWI_EXECDIR"
|
||||
${fwiParams}/bin/ModelGenerator \
|
||||
-m "$GARLIC_FWI_PARAMS" "$GARLIC_FWI_FREQ"
|
||||
)
|
||||
'';
|
||||
|
||||
postSrun = optionalString (conf.enableCTF) ''
|
||||
# Save the traces
|
||||
mv "$GARLIC_FWI_EXECDIR"/trace_* .
|
||||
'' + ''
|
||||
# Remove everything else
|
||||
rm -rf "$GARLIC_FWI_EXECDIR"
|
||||
'';
|
||||
};
|
||||
|
||||
exec = {nextStage, conf, ...}: stages.exec {
|
||||
in stages.exec {
|
||||
inherit nextStage;
|
||||
|
||||
# FIXME: FWI should allow the I/O directory to be specified as a
|
||||
# parameter
|
||||
pre = ''
|
||||
# Run fwi at the in a directory with fast local storage
|
||||
cd "$GARLIC_FWI_EXECDIR"
|
||||
FWI_SRUNDIR=$(pwd)
|
||||
FWI_EXECDIR="${conf.tempDir}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN"
|
||||
FWI_PARAMS="${fwiParams}/fwi_params.txt"
|
||||
FWI_FREQ="${fwiParams}/fwi_frequencies.txt"
|
||||
|
||||
# Run fwi in a directory with fast local storage
|
||||
mkdir -p "$FWI_EXECDIR"
|
||||
cd "$FWI_EXECDIR"
|
||||
|
||||
# Only generate the input if we have the CPU 0 (once per node)
|
||||
if grep -o 'Cpus_allowed_list:[[:space:]]0' \
|
||||
/proc/self/status > /dev/null;
|
||||
then
|
||||
FWI_CAPTAIN=1
|
||||
fi
|
||||
|
||||
if [ $FWI_CAPTAIN ]; then
|
||||
>&2 echo "generating the input dataset"
|
||||
${fwiParams}/bin/ModelGenerator -m "$FWI_PARAMS" "$FWI_FREQ"
|
||||
fi
|
||||
|
||||
echo >&2 "Current dir: $(pwd)"
|
||||
echo >&2 "Using PARAMS=$GARLIC_FWI_PARAMS and FREQ=$GARLIC_FWI_FREQ"
|
||||
echo >&2 "Using PARAMS=$FWI_PARAMS and FREQ=$FWI_FREQ"
|
||||
'' + optionalString (conf.enableCTF) ''
|
||||
export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf"
|
||||
'';
|
||||
|
||||
argv = [
|
||||
''"$GARLIC_FWI_PARAMS"''
|
||||
''"$GARLIC_FWI_FREQ"''
|
||||
''"$FWI_PARAMS"''
|
||||
''"$FWI_FREQ"''
|
||||
] ++ optional (needsBlocksize conf) conf.blocksize ++ [
|
||||
"-1" # Fordward steps
|
||||
"-1" # Backward steps
|
||||
conf.ioFreq # Write/read frequency
|
||||
];
|
||||
|
||||
post = ''
|
||||
# Go back to the garlic out directory
|
||||
cd "$FWI_SRUNDIR"
|
||||
|
||||
if [ $FWI_CAPTAIN ]; then
|
||||
'' + optionalString (conf.enableCTF) ''
|
||||
# FIXME: We should specify the path in the nanos6 config, so we
|
||||
# can avoid the race condition while they are generating the
|
||||
# traces
|
||||
sleep 3
|
||||
|
||||
# Save the traces
|
||||
mv "$FWI_EXECDIR"/trace_* .
|
||||
'' + ''
|
||||
rm -rf "$FWI_EXECDIR"
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
apps = bsc.garlic.apps;
|
||||
@ -117,8 +120,5 @@ rec {
|
||||
inherit fwiParams;
|
||||
};
|
||||
|
||||
pipeline = stdexp.stdPipelineOverride {
|
||||
# Replace the stdandard srun stage with our own
|
||||
overrides = { inherit srun; };
|
||||
} ++ [ exec program ];
|
||||
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user