fwi: adjust input size to meet timing constraints

The previous iniput size for both granularity and strong scaling tests
where too big to meet the timing constrains needed for garlic. This
patch sets a new, smaller, input size.

Also, a minor cleanup is applied to the rest of the fwi experiments
and figures.
This commit is contained in:
Aleix Roca Nonell 2021-04-07 12:35:44 +02:00
parent 3e5a56ebdb
commit 989f6ee018
14 changed files with 96 additions and 267 deletions

View File

@ -34,6 +34,7 @@ stdenv.mkDerivation rec {
# FIXME: Allow multiple MPI implementations # FIXME: Allow multiple MPI implementations
postPatch = '' postPatch = ''
sed -i 's/= OPENMPI$/= INTEL/g' Makefile sed -i 's/= OPENMPI$/= INTEL/g' Makefile
sed -i 's/USE_O_DIRECT ?= NO/USE_O_DIRECT ?= YES/g' Makefile || true
''; '';
# FIXME: This is an ugly hack. # FIXME: This is an ugly hack.

View File

@ -1,3 +1,23 @@
# This test compares a FWI version using poor data locality (+NOREUSE) versus
# the optimized version (used for all other experiments). Follows a pseudocode
# snippet illustrating the fundamental difference between version.
#
# NOREUSE
# ----------------------
# for (y) for (x) for (z)
# computA(v[y][x][z]);
# for (y) for (x) for (z)
# computB(v[y][x][z]);
# for (y) for (x) for (z)
# computC(v[y][x][z]);
#
# Optimized version
# ----------------------
# for (y) for (x) for (z)
# computA(v[y][x][z]);
# computB(v[y][x][z]);
# computC(v[y][x][z]);
{ {
stdenv stdenv
, stdexp , stdexp
@ -15,34 +35,14 @@ let
# Initial variable configuration # Initial variable configuration
varConf = { varConf = {
gitBranch = [ gitBranch = [
# "garlic/tampi+send+oss+task"
# "garlic/mpi+send+omp+task"
"garlic/mpi+send+oss+task" "garlic/mpi+send+oss+task"
"garlic/mpi+send+oss+task+noreuse" "garlic/mpi+send+oss+task+NOREUSE"
# "garlic/mpi+send+seq"
# "garlic/oss+task"
# "garlic/omp+task"
# "garlic/seq"
]; ];
blocksize = [ 1 2 4 8 ]; blocksize = [ 1 2 4 8 ];
#blocksize = [ 1 2 ];
n = [ n = [
# {nx=50; ny=4000; nz=50;}
# {nx=20; ny=4000; nz=20;}
# {nx=300; ny=8000; nz=300;} # half node, /
# {nx=300; ny=1000; nz=300;} # half node, /
# {nx=200; ny=1000; nz=200;} # half node, not enough tasks
# {nx=200; ny=4000; nz=200;} # --/ half node
# {nx=250; ny=2000; nz=250;} # / half node
{nx=300; ny=2000; nz=300;} # / half node {nx=300; ny=2000; nz=300;} # / half node
# {nx=100; ny=2000; nz=100;} # \-// half node
# {nx=150; ny=2000; nz=150;} # \-/ half node
# {nx=200; ny=64000; nz=200;} # --/ 16 nodes
# {nx=200; ny=4000; nz=200;} # --/ half node
# {nx=200; ny=8000; nz=200;} # --/ 1 node
# {nx=100; ny=8000; nz=100;} # --/ half node
]; ];
}; };

View File

@ -1,3 +1,5 @@
# Regular granularity test for FWI
{ {
stdenv stdenv
, stdexp , stdexp
@ -15,20 +17,20 @@ let
# Initial variable configuration # Initial variable configuration
varConf = { varConf = {
gitBranch = [ gitBranch = [
"garlic/tampi+send+oss+task" # "garlic/tampi+send+oss+task"
"garlic/tampi+isend+oss+task" "garlic/tampi+isend+oss+task"
"garlic/mpi+send+omp+task" # "garlic/mpi+send+omp+task"
"garlic/mpi+send+oss+task" # "garlic/mpi+send+oss+task"
# "garlic/mpi+send+seq" # "garlic/mpi+send+seq"
# "garlic/oss+task" # "garlic/oss+task"
# "garlic/omp+task" # "garlic/omp+task"
# "garlic/seq" # "garlic/seq"
]; ];
blocksize = [ 1 2 4 8 16 32 ]; blocksize = [ 1 2 4 8 16 32 64 128 256 ];
n = [ n = [
{nx=500; nz=500; ny=2000; ntpn=2; nn=1;} {nx=100; nz=100; ny=8000; ntpn=2; nn=1;}
]; ];
}; };

View File

@ -1,138 +0,0 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
}:
with stdenv.lib;
let
inherit (targetMachine) fs;
# Initial variable configuration
varConf = {
gitBranch = [
# "garlic/tampi+send+oss+task"
# "garlic/mpi+send+omp+task"
"garlic/mpi+send+oss+task"
# "garlic/mpi+send+seq"
# "garlic/oss+task"
# "garlic/omp+task"
# "garlic/seq"
];
blocksize = [ 1 ];
n = [
# {nx=500; nz=500; ny=8000;}
{nx=500; nz=500; ny=2000;}
];
nodes = [ 1 ]
numactl = [ true false ]
};
# The c value contains something like:
# {
# n = { nx=500; ny=500; nz=500; }
# blocksize = 1;
# gitBranch = "garlic/tampi+send+oss+task";
# }
machineConfig = targetMachine.config;
# Generate the complete configuration for each unit
genConf = with bsc; c: targetMachine.config // rec {
expName = "fwi";
unitName = "${expName}-test";
inherit (machineConfig) hw;
cc = icc;
inherit (c) gitBranch blocksize;
useNumactl = c.numactl
#nx = c.n.nx;
#ny = c.n.ny;
#nz = c.n.nz;
# Same but shorter:
inherit (c.n) nx ny nz;
fwiInput = bsc.apps.fwi.input.override {
inherit (c.n) nx ny nz;
};
# Other FWI parameters
ioFreq = -1;
# Repeat the execution of each unit several times
loops = 10;
#loops = 1;
# Resources
cpusPerTask = if (useNumactl) then hw.cpusPerNode else hw.cpusPerSocket;
ntasksPerNode = hw.cpusPerNode / cpusPerTask;
nodes = c.nodes;
qos = "debug";
time = "02:00:00";
jobName = unitName;
tracing = "no";
# Enable permissions to write in the local storage
extraMounts = [ fs.local.temp ];
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
exec = {nextStage, conf, ...}: stages.exec ({
inherit nextStage;
pre = ''
CDIR=$PWD
if [[ "${conf.tracing}" == "yes" ]]; then
export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf"
fi
EXECDIR="${fs.local.temp}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN"
mkdir -p $EXECDIR
cd $EXECDIR
ln -fs ${conf.fwiInput}/InputModels InputModels || true
'';
argv = [
"${conf.fwiInput}/fwi_params.txt"
"${conf.fwiInput}/fwi_frequencies.txt"
conf.blocksize
"-1" # Fordward steps
"-1" # Backward steps
conf.ioFreq # Write/read frequency
];
post = ''
rm -rf Results || true
if [[ "${conf.tracing}" == "yes" ]]; then
mv trace_* $CDIR
fi
'';
} // optionalAttrs (conf.useNumact) {
program = "${numactl}/bin/numactl --interleave=all ${stageProgram nextStage}";
});
apps = bsc.garlic.apps;
# FWI program
program = {nextStage, conf, ...}: apps.fwi.solver.override {
inherit (conf) cc gitBranch fwiInput;
};
pipeline = stdexp.stdPipeline ++ [ exec program ];
in
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -1,3 +1,6 @@
# Strong scaling test for FWI variants based on forkjoint. This
# experiment does not rely on block sizes.
{ {
stdenv stdenv
, stdexp , stdexp
@ -15,20 +18,13 @@ let
# Initial variable configuration # Initial variable configuration
varConf = { varConf = {
gitBranch = [ gitBranch = [
# "garlic/tampi+send+oss+task"
# "garlic/mpi+send+omp+task"
# "garlic/mpi+send+oss+task"
"garlic/mpi+send+omp+fork" "garlic/mpi+send+omp+fork"
# "garlic/mpi+send+seq"
# "garlic/oss+task"
# "garlic/omp+task"
# "garlic/seq"
]; ];
blocksize = [ 0 ]; blocksize = [ 0 ];
n = [ n = [
{nx=500; nz=500; ny=16000;} {nx=100; nz=100; ny=8000;}
]; ];
nodes = [ 1 2 4 8 16 ]; nodes = [ 1 2 4 8 16 ];

View File

@ -1,3 +1,10 @@
# Strong scaling test for FWI variants based on tasks with and without I/O.
# This experiment solves a computationally expensive input which brings the
# storage devices to saturation when I/O is enabled. the same input us run
# without I/O for comparison purposes.. Also, the experiments are runt for a
# range of block sizes deemed as efficient according to the granularity
# experiment.
{ {
stdenv stdenv
, stdexp , stdexp

View File

@ -1,3 +1,7 @@
# Strong scaling test for FWI variants based exclusively on MPI. This
# experiment does not rely on block sizes. An MPI process is instantiated per
# core.
{ {
stdenv stdenv
, stdexp , stdexp
@ -15,24 +19,17 @@ let
# Initial variable configuration # Initial variable configuration
varConf = { varConf = {
gitBranch = [ gitBranch = [
# "garlic/tampi+send+oss+task"
# "garlic/mpi+send+omp+task"
# "garlic/mpi+send+oss+task"
# "garlic/mpi+send+omp+fork"
"garlic/mpi+send+seq" "garlic/mpi+send+seq"
# "garlic/oss+task"
# "garlic/omp+task"
# "garlic/seq"
]; ];
blocksize = [ 0 ]; blocksize = [ 0 ];
n = [ n = [
{nx=500; nz=500; ny=16000;} {nx=100; nz=100; ny=8000;}
]; ];
# Not enough planes for 8 and 16 nodes # Not enough planes for 4, 8 and 16 nodes
nodes = [ 1 2 4 ]; nodes = [ 1 2 ];
}; };

View File

@ -1,3 +1,7 @@
# Strong scaling test for FWI variants based on tasks. This
# experiment explores a range of block sizes deemed as efficient
# according to the granularity experiment.
{ {
stdenv stdenv
, stdexp , stdexp
@ -19,16 +23,12 @@ let
"garlic/tampi+isend+oss+task" "garlic/tampi+isend+oss+task"
"garlic/mpi+send+omp+task" "garlic/mpi+send+omp+task"
"garlic/mpi+send+oss+task" "garlic/mpi+send+oss+task"
# "garlic/mpi+send+seq"
# "garlic/oss+task"
# "garlic/omp+task"
# "garlic/seq"
]; ];
blocksize = [ 1 2 4 8 ]; blocksize = [ 1 2 4 8 16 ];
n = [ n = [
{nx=500; nz=500; ny=16000;} {nx=100; nz=100; ny=8000;}
]; ];
nodes = [ 1 2 4 8 16 ]; nodes = [ 1 2 4 8 16 ];

View File

@ -1,3 +1,7 @@
# This experiment compares the effect of not using I/O versus using O_DIRECT |
# O_DSYNC enabled I/O. This is a reduced version of the strong_scaling_io
# experiment.
{ {
stdenv stdenv
, stdexp , stdexp
@ -15,8 +19,8 @@ let
# Initial variable configuration # Initial variable configuration
varConf = { varConf = {
gitBranch = [ gitBranch = [
# "garlic/tampi+send+oss+task" "garlic/tampi+send+oss+task"
"garlic/mpi+send+omp+task" # "garlic/mpi+send+omp+task"
# "garlic/mpi+send+oss+task" # "garlic/mpi+send+oss+task"
# "garlic/mpi+send+seq" # "garlic/mpi+send+seq"
# "garlic/oss+task" # "garlic/oss+task"
@ -24,14 +28,16 @@ let
# "garlic/seq" # "garlic/seq"
]; ];
blocksize = [ 1 2 4 8 16 32 ]; blocksize = [ 1 ];
#blocksize = [ 1 2 4 8 ];
n = [ n = [
#{nx=500; nz=500; ny=1000; ntpn=1; nn=1;} {nx=500; nz=500; ny=16000;}
{nx=500; nz=500; ny=2000; ntpn=2; nn=1;}
]; ];
nodes = [ 4 ];
ioFreq = [ 9999 (-1) ];
}; };
# The c value contains something like: # The c value contains something like:
@ -57,14 +63,14 @@ let
#nz = c.n.nz; #nz = c.n.nz;
# Same but shorter: # Same but shorter:
inherit (c.n) nx ny nz ntpn nn; inherit (c.n) nx ny nz;
fwiInput = bsc.apps.fwi.input.override { fwiInput = bsc.apps.fwi.input.override {
inherit (c.n) nx ny nz; inherit (c.n) nx ny nz;
}; };
# Other FWI parameters # Other FWI parameters
ioFreq = -1; ioFreq = c.ioFreq;
# Repeat the execution of each unit several times # Repeat the execution of each unit several times
loops = 10; loops = 10;
@ -72,8 +78,8 @@ let
# Resources # Resources
cpusPerTask = hw.cpusPerSocket; cpusPerTask = hw.cpusPerSocket;
ntasksPerNode = ntpn; ntasksPerNode = 2;
nodes = nn; nodes = c.nodes;
qos = "debug"; qos = "debug";
time = "02:00:00"; time = "02:00:00";
jobName = unitName; jobName = unitName;

View File

@ -98,12 +98,13 @@
}; };
fwi = { fwi = {
test = callPackage ./fwi/test.nix { }; granularity = callPackage ./fwi/granularity.nix { };
strong_scaling_task = callPackage ./fwi/strong_scaling_task.nix { }; strong_scaling_task = callPackage ./fwi/strong_scaling_task.nix { };
strong_scaling_forkjoin = callPackage ./fwi/strong_scaling_forkjoin.nix { }; strong_scaling_forkjoin = callPackage ./fwi/strong_scaling_forkjoin.nix { };
strong_scaling_mpionly = callPackage ./fwi/strong_scaling_mpionly.nix { }; strong_scaling_mpionly = callPackage ./fwi/strong_scaling_mpionly.nix { };
data_reuse = callPackage ./fwi/data_reuse.nix { };
strong_scaling_io = callPackage ./fwi/strong_scaling_io.nix { }; strong_scaling_io = callPackage ./fwi/strong_scaling_io.nix { };
granularity = callPackage ./fwi/granularity.nix { }; sync_io = callPackage ./fwi/sync_io.nix { };
}; };
osu = rec { osu = rec {

View File

@ -30,7 +30,7 @@ w=5
#################################################################### ####################################################################
### Line Graph ### Line Graph
#################################################################### ####################################################################
png("time.png", width=w*ppi, height=h*ppi, res=ppi) png("mtime.png", width=w*ppi, height=h*ppi, res=ppi)
## Create the plot with the normalized time vs nblocks ## Create the plot with the normalized time vs nblocks
p = ggplot(df, aes(x = blocksize, y=mtime, group=gitBranch, color=gitBranch)) + p = ggplot(df, aes(x = blocksize, y=mtime, group=gitBranch, color=gitBranch)) +
@ -49,22 +49,23 @@ print(p)
dev.off() dev.off()
#################################################################### ####################################################################
### Boxplot ### Line Graph
#################################################################### ####################################################################
png("box.png", width=w*ppi, height=h*ppi, res=ppi) png("time.png", width=w*ppi, height=h*ppi, res=ppi)
# Create the plot with the normalized time vs nblocks
p = ggplot(df, aes(x=blocksize, y=time, group=gitBranch, colour=gitBranch)) + ## Create the plot with the normalized time vs nblocks
# Labels p = ggplot(df, aes(x = blocksize, y=time, group=gitBranch, color=gitBranch)) +
labs(x="Blocksize", y="Normalized time", geom_point() +
title=sprintf("FWI Time"), geom_line() +
subtitle=input_file) +
# Draw boxplots
geom_boxplot() +
theme_bw() + theme_bw() +
labs(x="Blocksize", y="Time (s)", title="FWI granularity",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8)) + theme(plot.subtitle=element_text(size=8)) +
theme(legend.position = c(0.5, 0.88)) theme(legend.position = c(0.5, 0.88))
# Render the plot # Render the plot
print(p) print(p)
## Save the png image
# Save the png image
dev.off() dev.off()

View File

@ -14,7 +14,7 @@ dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten() jsonlite::flatten()
# Select block size to display # Select block size to display
useBlocksize = 1 useBlocksize = 2
# We only need the nblocks and time # We only need the nblocks and time
df = select(dataset, config.blocksize, config.gitBranch, config.nodes, time) %>% df = select(dataset, config.blocksize, config.gitBranch, config.nodes, time) %>%
@ -59,7 +59,7 @@ print(p)
dev.off() dev.off()
#################################################################### ####################################################################
### Line plot (timei x nodes) ### Line plot (time x nodes)
#################################################################### ####################################################################
png("nxtime.png", width=w*ppi, height=h*ppi, res=ppi) png("nxtime.png", width=w*ppi, height=h*ppi, res=ppi)

View File

@ -1,46 +0,0 @@
library(ggplot2)
library(dplyr)
library(scales)
library(jsonlite)
args=commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1]
input_file = "input.json"
if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten()
# We only need the nblocks and time
df = select(dataset, config.blocksize, config.gitBranch, time) %>%
rename(blocksize=config.blocksize, gitBranch=config.gitBranch) %>%
group_by(blocksize, gitBranch) %>%
mutate(mtime = median(time)) %>%
ungroup()
df$gitBranch = as.factor(df$gitBranch)
df$blocksize = as.factor(df$blocksize)
ppi=300
h=5
w=5
png("time.png", width=w*ppi, height=h*ppi, res=ppi)
#
## Create the plot with the normalized time vs nblocks
p = ggplot(df, aes(x=blocksize, y=time)) +
geom_point() +
geom_line(aes(y=mtime, group=gitBranch, color=gitBranch)) +
theme_bw() +
labs(x="Blocksize", y="Time (s)", title="FWI granularity",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8)) +
theme(legend.position = c(0.5, 0.88))
# Render the plot
print(p)
# Save the png image
dev.off()

View File

@ -62,10 +62,12 @@ in
}; };
fwi = with exp.fwi; { fwi = with exp.fwi; {
test = stdPlot ./fwi/test.R [ test ];
strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin strong_scaling_mpionly ];
strong_scaling_io = stdPlot ./fwi/strong_scaling_io.R [ strong_scaling_io ];
granularity = stdPlot ./fwi/granularity.R [ granularity ]; granularity = stdPlot ./fwi/granularity.R [ granularity ];
strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin ];
#strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin strong_scaling_mpionly ];
data_reuse = stdPlot ./fwi/granularity.R [ data_reuse ];
strong_scaling_io = stdPlot ./fwi/strong_scaling_io.R [ strong_scaling_io ];
sync_io = stdPlot ./fwi/strong_scaling_io.R [ sync_io ];
}; };
osu = with exp.osu; { osu = with exp.osu; {