forked from rarias/bscpkgs
fwi: adjust input size to meet timing constraints
The previous iniput size for both granularity and strong scaling tests where too big to meet the timing constrains needed for garlic. This patch sets a new, smaller, input size. Also, a minor cleanup is applied to the rest of the fwi experiments and figures.
This commit is contained in:
@@ -1,3 +1,23 @@
|
||||
# This test compares a FWI version using poor data locality (+NOREUSE) versus
|
||||
# the optimized version (used for all other experiments). Follows a pseudocode
|
||||
# snippet illustrating the fundamental difference between version.
|
||||
#
|
||||
# NOREUSE
|
||||
# ----------------------
|
||||
# for (y) for (x) for (z)
|
||||
# computA(v[y][x][z]);
|
||||
# for (y) for (x) for (z)
|
||||
# computB(v[y][x][z]);
|
||||
# for (y) for (x) for (z)
|
||||
# computC(v[y][x][z]);
|
||||
#
|
||||
# Optimized version
|
||||
# ----------------------
|
||||
# for (y) for (x) for (z)
|
||||
# computA(v[y][x][z]);
|
||||
# computB(v[y][x][z]);
|
||||
# computC(v[y][x][z]);
|
||||
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
@@ -15,34 +35,14 @@ let
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
gitBranch = [
|
||||
# "garlic/tampi+send+oss+task"
|
||||
# "garlic/mpi+send+omp+task"
|
||||
"garlic/mpi+send+oss+task"
|
||||
"garlic/mpi+send+oss+task+noreuse"
|
||||
# "garlic/mpi+send+seq"
|
||||
# "garlic/oss+task"
|
||||
# "garlic/omp+task"
|
||||
# "garlic/seq"
|
||||
"garlic/mpi+send+oss+task+NOREUSE"
|
||||
];
|
||||
|
||||
blocksize = [ 1 2 4 8 ];
|
||||
#blocksize = [ 1 2 ];
|
||||
|
||||
n = [
|
||||
# {nx=50; ny=4000; nz=50;}
|
||||
# {nx=20; ny=4000; nz=20;}
|
||||
# {nx=300; ny=8000; nz=300;} # half node, /
|
||||
# {nx=300; ny=1000; nz=300;} # half node, /
|
||||
# {nx=200; ny=1000; nz=200;} # half node, not enough tasks
|
||||
# {nx=200; ny=4000; nz=200;} # --/ half node
|
||||
# {nx=250; ny=2000; nz=250;} # / half node
|
||||
{nx=300; ny=2000; nz=300;} # / half node
|
||||
# {nx=100; ny=2000; nz=100;} # \-// half node
|
||||
# {nx=150; ny=2000; nz=150;} # \-/ half node
|
||||
# {nx=200; ny=64000; nz=200;} # --/ 16 nodes
|
||||
# {nx=200; ny=4000; nz=200;} # --/ half node
|
||||
# {nx=200; ny=8000; nz=200;} # --/ 1 node
|
||||
# {nx=100; ny=8000; nz=100;} # --/ half node
|
||||
];
|
||||
};
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# Regular granularity test for FWI
|
||||
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
@@ -15,20 +17,20 @@ let
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
gitBranch = [
|
||||
"garlic/tampi+send+oss+task"
|
||||
"garlic/tampi+isend+oss+task"
|
||||
"garlic/mpi+send+omp+task"
|
||||
"garlic/mpi+send+oss+task"
|
||||
# "garlic/tampi+send+oss+task"
|
||||
"garlic/tampi+isend+oss+task"
|
||||
# "garlic/mpi+send+omp+task"
|
||||
# "garlic/mpi+send+oss+task"
|
||||
# "garlic/mpi+send+seq"
|
||||
# "garlic/oss+task"
|
||||
# "garlic/omp+task"
|
||||
# "garlic/seq"
|
||||
];
|
||||
|
||||
blocksize = [ 1 2 4 8 16 32 ];
|
||||
blocksize = [ 1 2 4 8 16 32 64 128 256 ];
|
||||
|
||||
n = [
|
||||
{nx=500; nz=500; ny=2000; ntpn=2; nn=1;}
|
||||
{nx=100; nz=100; ny=8000; ntpn=2; nn=1;}
|
||||
];
|
||||
|
||||
};
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
, bsc
|
||||
, targetMachine
|
||||
, stages
|
||||
}:
|
||||
|
||||
with stdenv.lib;
|
||||
|
||||
let
|
||||
|
||||
inherit (targetMachine) fs;
|
||||
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
gitBranch = [
|
||||
# "garlic/tampi+send+oss+task"
|
||||
# "garlic/mpi+send+omp+task"
|
||||
"garlic/mpi+send+oss+task"
|
||||
# "garlic/mpi+send+seq"
|
||||
# "garlic/oss+task"
|
||||
# "garlic/omp+task"
|
||||
# "garlic/seq"
|
||||
];
|
||||
|
||||
blocksize = [ 1 ];
|
||||
|
||||
n = [
|
||||
# {nx=500; nz=500; ny=8000;}
|
||||
{nx=500; nz=500; ny=2000;}
|
||||
];
|
||||
|
||||
nodes = [ 1 ]
|
||||
|
||||
numactl = [ true false ]
|
||||
|
||||
};
|
||||
|
||||
# The c value contains something like:
|
||||
# {
|
||||
# n = { nx=500; ny=500; nz=500; }
|
||||
# blocksize = 1;
|
||||
# gitBranch = "garlic/tampi+send+oss+task";
|
||||
# }
|
||||
|
||||
machineConfig = targetMachine.config;
|
||||
|
||||
# Generate the complete configuration for each unit
|
||||
genConf = with bsc; c: targetMachine.config // rec {
|
||||
expName = "fwi";
|
||||
unitName = "${expName}-test";
|
||||
inherit (machineConfig) hw;
|
||||
|
||||
cc = icc;
|
||||
inherit (c) gitBranch blocksize;
|
||||
useNumactl = c.numactl
|
||||
|
||||
#nx = c.n.nx;
|
||||
#ny = c.n.ny;
|
||||
#nz = c.n.nz;
|
||||
|
||||
# Same but shorter:
|
||||
inherit (c.n) nx ny nz;
|
||||
|
||||
fwiInput = bsc.apps.fwi.input.override {
|
||||
inherit (c.n) nx ny nz;
|
||||
};
|
||||
|
||||
# Other FWI parameters
|
||||
ioFreq = -1;
|
||||
|
||||
# Repeat the execution of each unit several times
|
||||
loops = 10;
|
||||
#loops = 1;
|
||||
|
||||
# Resources
|
||||
cpusPerTask = if (useNumactl) then hw.cpusPerNode else hw.cpusPerSocket;
|
||||
ntasksPerNode = hw.cpusPerNode / cpusPerTask;
|
||||
nodes = c.nodes;
|
||||
qos = "debug";
|
||||
time = "02:00:00";
|
||||
jobName = unitName;
|
||||
|
||||
tracing = "no";
|
||||
|
||||
# Enable permissions to write in the local storage
|
||||
extraMounts = [ fs.local.temp ];
|
||||
|
||||
};
|
||||
|
||||
# Compute the array of configurations
|
||||
configs = stdexp.buildConfigs {
|
||||
inherit varConf genConf;
|
||||
};
|
||||
|
||||
exec = {nextStage, conf, ...}: stages.exec ({
|
||||
inherit nextStage;
|
||||
pre = ''
|
||||
CDIR=$PWD
|
||||
if [[ "${conf.tracing}" == "yes" ]]; then
|
||||
export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf"
|
||||
fi
|
||||
EXECDIR="${fs.local.temp}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN"
|
||||
mkdir -p $EXECDIR
|
||||
cd $EXECDIR
|
||||
ln -fs ${conf.fwiInput}/InputModels InputModels || true
|
||||
'';
|
||||
argv = [
|
||||
"${conf.fwiInput}/fwi_params.txt"
|
||||
"${conf.fwiInput}/fwi_frequencies.txt"
|
||||
conf.blocksize
|
||||
"-1" # Fordward steps
|
||||
"-1" # Backward steps
|
||||
conf.ioFreq # Write/read frequency
|
||||
];
|
||||
post = ''
|
||||
rm -rf Results || true
|
||||
if [[ "${conf.tracing}" == "yes" ]]; then
|
||||
mv trace_* $CDIR
|
||||
fi
|
||||
'';
|
||||
} // optionalAttrs (conf.useNumact) {
|
||||
program = "${numactl}/bin/numactl --interleave=all ${stageProgram nextStage}";
|
||||
});
|
||||
|
||||
apps = bsc.garlic.apps;
|
||||
|
||||
# FWI program
|
||||
program = {nextStage, conf, ...}: apps.fwi.solver.override {
|
||||
inherit (conf) cc gitBranch fwiInput;
|
||||
};
|
||||
|
||||
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
||||
|
||||
in
|
||||
|
||||
stdexp.genExperiment { inherit configs pipeline; }
|
||||
@@ -1,3 +1,6 @@
|
||||
# Strong scaling test for FWI variants based on forkjoint. This
|
||||
# experiment does not rely on block sizes.
|
||||
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
@@ -15,20 +18,13 @@ let
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
gitBranch = [
|
||||
# "garlic/tampi+send+oss+task"
|
||||
# "garlic/mpi+send+omp+task"
|
||||
# "garlic/mpi+send+oss+task"
|
||||
"garlic/mpi+send+omp+fork"
|
||||
# "garlic/mpi+send+seq"
|
||||
# "garlic/oss+task"
|
||||
# "garlic/omp+task"
|
||||
# "garlic/seq"
|
||||
];
|
||||
|
||||
blocksize = [ 0 ];
|
||||
|
||||
n = [
|
||||
{nx=500; nz=500; ny=16000;}
|
||||
{nx=100; nz=100; ny=8000;}
|
||||
];
|
||||
|
||||
nodes = [ 1 2 4 8 16 ];
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
# Strong scaling test for FWI variants based on tasks with and without I/O.
|
||||
# This experiment solves a computationally expensive input which brings the
|
||||
# storage devices to saturation when I/O is enabled. the same input us run
|
||||
# without I/O for comparison purposes.. Also, the experiments are runt for a
|
||||
# range of block sizes deemed as efficient according to the granularity
|
||||
# experiment.
|
||||
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# Strong scaling test for FWI variants based exclusively on MPI. This
|
||||
# experiment does not rely on block sizes. An MPI process is instantiated per
|
||||
# core.
|
||||
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
@@ -15,24 +19,17 @@ let
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
gitBranch = [
|
||||
# "garlic/tampi+send+oss+task"
|
||||
# "garlic/mpi+send+omp+task"
|
||||
# "garlic/mpi+send+oss+task"
|
||||
# "garlic/mpi+send+omp+fork"
|
||||
"garlic/mpi+send+seq"
|
||||
# "garlic/oss+task"
|
||||
# "garlic/omp+task"
|
||||
# "garlic/seq"
|
||||
];
|
||||
|
||||
blocksize = [ 0 ];
|
||||
|
||||
n = [
|
||||
{nx=500; nz=500; ny=16000;}
|
||||
{nx=100; nz=100; ny=8000;}
|
||||
];
|
||||
|
||||
# Not enough planes for 8 and 16 nodes
|
||||
nodes = [ 1 2 4 ];
|
||||
# Not enough planes for 4, 8 and 16 nodes
|
||||
nodes = [ 1 2 ];
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# Strong scaling test for FWI variants based on tasks. This
|
||||
# experiment explores a range of block sizes deemed as efficient
|
||||
# according to the granularity experiment.
|
||||
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
@@ -19,16 +23,12 @@ let
|
||||
"garlic/tampi+isend+oss+task"
|
||||
"garlic/mpi+send+omp+task"
|
||||
"garlic/mpi+send+oss+task"
|
||||
# "garlic/mpi+send+seq"
|
||||
# "garlic/oss+task"
|
||||
# "garlic/omp+task"
|
||||
# "garlic/seq"
|
||||
];
|
||||
|
||||
blocksize = [ 1 2 4 8 ];
|
||||
blocksize = [ 1 2 4 8 16 ];
|
||||
|
||||
n = [
|
||||
{nx=500; nz=500; ny=16000;}
|
||||
{nx=100; nz=100; ny=8000;}
|
||||
];
|
||||
|
||||
nodes = [ 1 2 4 8 16 ];
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# This experiment compares the effect of not using I/O versus using O_DIRECT |
|
||||
# O_DSYNC enabled I/O. This is a reduced version of the strong_scaling_io
|
||||
# experiment.
|
||||
|
||||
{
|
||||
stdenv
|
||||
, stdexp
|
||||
@@ -15,8 +19,8 @@ let
|
||||
# Initial variable configuration
|
||||
varConf = {
|
||||
gitBranch = [
|
||||
# "garlic/tampi+send+oss+task"
|
||||
"garlic/mpi+send+omp+task"
|
||||
"garlic/tampi+send+oss+task"
|
||||
# "garlic/mpi+send+omp+task"
|
||||
# "garlic/mpi+send+oss+task"
|
||||
# "garlic/mpi+send+seq"
|
||||
# "garlic/oss+task"
|
||||
@@ -24,14 +28,16 @@ let
|
||||
# "garlic/seq"
|
||||
];
|
||||
|
||||
blocksize = [ 1 2 4 8 16 32 ];
|
||||
#blocksize = [ 1 2 4 8 ];
|
||||
blocksize = [ 1 ];
|
||||
|
||||
n = [
|
||||
#{nx=500; nz=500; ny=1000; ntpn=1; nn=1;}
|
||||
{nx=500; nz=500; ny=2000; ntpn=2; nn=1;}
|
||||
{nx=500; nz=500; ny=16000;}
|
||||
];
|
||||
|
||||
nodes = [ 4 ];
|
||||
|
||||
ioFreq = [ 9999 (-1) ];
|
||||
|
||||
};
|
||||
|
||||
# The c value contains something like:
|
||||
@@ -57,14 +63,14 @@ let
|
||||
#nz = c.n.nz;
|
||||
|
||||
# Same but shorter:
|
||||
inherit (c.n) nx ny nz ntpn nn;
|
||||
inherit (c.n) nx ny nz;
|
||||
|
||||
fwiInput = bsc.apps.fwi.input.override {
|
||||
inherit (c.n) nx ny nz;
|
||||
};
|
||||
|
||||
# Other FWI parameters
|
||||
ioFreq = -1;
|
||||
ioFreq = c.ioFreq;
|
||||
|
||||
# Repeat the execution of each unit several times
|
||||
loops = 10;
|
||||
@@ -72,8 +78,8 @@ let
|
||||
|
||||
# Resources
|
||||
cpusPerTask = hw.cpusPerSocket;
|
||||
ntasksPerNode = ntpn;
|
||||
nodes = nn;
|
||||
ntasksPerNode = 2;
|
||||
nodes = c.nodes;
|
||||
qos = "debug";
|
||||
time = "02:00:00";
|
||||
jobName = unitName;
|
||||
@@ -98,12 +98,13 @@
|
||||
};
|
||||
|
||||
fwi = {
|
||||
test = callPackage ./fwi/test.nix { };
|
||||
granularity = callPackage ./fwi/granularity.nix { };
|
||||
strong_scaling_task = callPackage ./fwi/strong_scaling_task.nix { };
|
||||
strong_scaling_forkjoin = callPackage ./fwi/strong_scaling_forkjoin.nix { };
|
||||
strong_scaling_mpionly = callPackage ./fwi/strong_scaling_mpionly.nix { };
|
||||
data_reuse = callPackage ./fwi/data_reuse.nix { };
|
||||
strong_scaling_io = callPackage ./fwi/strong_scaling_io.nix { };
|
||||
granularity = callPackage ./fwi/granularity.nix { };
|
||||
sync_io = callPackage ./fwi/sync_io.nix { };
|
||||
};
|
||||
|
||||
osu = rec {
|
||||
|
||||
Reference in New Issue
Block a user