From 989f6ee018a9aee8928fc270e10094ae6f99b3ce Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Wed, 7 Apr 2021 12:35:44 +0200 Subject: [PATCH] fwi: adjust input size to meet timing constraints The previous iniput size for both granularity and strong scaling tests where too big to meet the timing constrains needed for garlic. This patch sets a new, smaller, input size. Also, a minor cleanup is applied to the rest of the fwi experiments and figures. --- garlic/apps/fwi/default.nix | 1 + garlic/exp/fwi/data_reuse.nix | 42 +++---- garlic/exp/fwi/granularity.nix | 14 ++- garlic/exp/fwi/memory_affinity.nix | 138 --------------------- garlic/exp/fwi/strong_scaling_forkjoin.nix | 12 +- garlic/exp/fwi/strong_scaling_io.nix | 7 ++ garlic/exp/fwi/strong_scaling_mpionly.nix | 17 ++- garlic/exp/fwi/strong_scaling_task.nix | 12 +- garlic/exp/fwi/{test.nix => sync_io.nix} | 26 ++-- garlic/exp/index.nix | 5 +- garlic/fig/fwi/granularity.R | 31 ++--- garlic/fig/fwi/strong_scaling.R | 4 +- garlic/fig/fwi/test.R | 46 ------- garlic/fig/index.nix | 8 +- 14 files changed, 96 insertions(+), 267 deletions(-) delete mode 100644 garlic/exp/fwi/memory_affinity.nix rename garlic/exp/fwi/{test.nix => sync_io.nix} (84%) delete mode 100644 garlic/fig/fwi/test.R diff --git a/garlic/apps/fwi/default.nix b/garlic/apps/fwi/default.nix index 013fcef..5b1a9d6 100644 --- a/garlic/apps/fwi/default.nix +++ b/garlic/apps/fwi/default.nix @@ -34,6 +34,7 @@ stdenv.mkDerivation rec { # FIXME: Allow multiple MPI implementations postPatch = '' sed -i 's/= OPENMPI$/= INTEL/g' Makefile + sed -i 's/USE_O_DIRECT ?= NO/USE_O_DIRECT ?= YES/g' Makefile || true ''; # FIXME: This is an ugly hack. diff --git a/garlic/exp/fwi/data_reuse.nix b/garlic/exp/fwi/data_reuse.nix index 623492a..84f0c4b 100644 --- a/garlic/exp/fwi/data_reuse.nix +++ b/garlic/exp/fwi/data_reuse.nix @@ -1,3 +1,23 @@ +# This test compares a FWI version using poor data locality (+NOREUSE) versus +# the optimized version (used for all other experiments). Follows a pseudocode +# snippet illustrating the fundamental difference between version. +# +# NOREUSE +# ---------------------- +# for (y) for (x) for (z) +# computA(v[y][x][z]); +# for (y) for (x) for (z) +# computB(v[y][x][z]); +# for (y) for (x) for (z) +# computC(v[y][x][z]); +# +# Optimized version +# ---------------------- +# for (y) for (x) for (z) +# computA(v[y][x][z]); +# computB(v[y][x][z]); +# computC(v[y][x][z]); + { stdenv , stdexp @@ -15,34 +35,14 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" "garlic/mpi+send+oss+task" - "garlic/mpi+send+oss+task+noreuse" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" + "garlic/mpi+send+oss+task+NOREUSE" ]; blocksize = [ 1 2 4 8 ]; - #blocksize = [ 1 2 ]; n = [ -# {nx=50; ny=4000; nz=50;} -# {nx=20; ny=4000; nz=20;} -# {nx=300; ny=8000; nz=300;} # half node, / -# {nx=300; ny=1000; nz=300;} # half node, / -# {nx=200; ny=1000; nz=200;} # half node, not enough tasks -# {nx=200; ny=4000; nz=200;} # --/ half node -# {nx=250; ny=2000; nz=250;} # / half node {nx=300; ny=2000; nz=300;} # / half node -# {nx=100; ny=2000; nz=100;} # \-// half node -# {nx=150; ny=2000; nz=150;} # \-/ half node -# {nx=200; ny=64000; nz=200;} # --/ 16 nodes -# {nx=200; ny=4000; nz=200;} # --/ half node -# {nx=200; ny=8000; nz=200;} # --/ 1 node -# {nx=100; ny=8000; nz=100;} # --/ half node ]; }; diff --git a/garlic/exp/fwi/granularity.nix b/garlic/exp/fwi/granularity.nix index 1c8cac4..7773b3b 100644 --- a/garlic/exp/fwi/granularity.nix +++ b/garlic/exp/fwi/granularity.nix @@ -1,3 +1,5 @@ +# Regular granularity test for FWI + { stdenv , stdexp @@ -15,20 +17,20 @@ let # Initial variable configuration varConf = { gitBranch = [ - "garlic/tampi+send+oss+task" - "garlic/tampi+isend+oss+task" - "garlic/mpi+send+omp+task" - "garlic/mpi+send+oss+task" +# "garlic/tampi+send+oss+task" + "garlic/tampi+isend+oss+task" +# "garlic/mpi+send+omp+task" +# "garlic/mpi+send+oss+task" # "garlic/mpi+send+seq" # "garlic/oss+task" # "garlic/omp+task" # "garlic/seq" ]; - blocksize = [ 1 2 4 8 16 32 ]; + blocksize = [ 1 2 4 8 16 32 64 128 256 ]; n = [ - {nx=500; nz=500; ny=2000; ntpn=2; nn=1;} + {nx=100; nz=100; ny=8000; ntpn=2; nn=1;} ]; }; diff --git a/garlic/exp/fwi/memory_affinity.nix b/garlic/exp/fwi/memory_affinity.nix deleted file mode 100644 index 3df7226..0000000 --- a/garlic/exp/fwi/memory_affinity.nix +++ /dev/null @@ -1,138 +0,0 @@ -{ - stdenv -, stdexp -, bsc -, targetMachine -, stages -}: - -with stdenv.lib; - -let - - inherit (targetMachine) fs; - - # Initial variable configuration - varConf = { - gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" - "garlic/mpi+send+oss+task" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" - ]; - - blocksize = [ 1 ]; - - n = [ -# {nx=500; nz=500; ny=8000;} - {nx=500; nz=500; ny=2000;} - ]; - - nodes = [ 1 ] - - numactl = [ true false ] - - }; - -# The c value contains something like: -# { -# n = { nx=500; ny=500; nz=500; } -# blocksize = 1; -# gitBranch = "garlic/tampi+send+oss+task"; -# } - - machineConfig = targetMachine.config; - - # Generate the complete configuration for each unit - genConf = with bsc; c: targetMachine.config // rec { - expName = "fwi"; - unitName = "${expName}-test"; - inherit (machineConfig) hw; - - cc = icc; - inherit (c) gitBranch blocksize; - useNumactl = c.numactl - - #nx = c.n.nx; - #ny = c.n.ny; - #nz = c.n.nz; - - # Same but shorter: - inherit (c.n) nx ny nz; - - fwiInput = bsc.apps.fwi.input.override { - inherit (c.n) nx ny nz; - }; - - # Other FWI parameters - ioFreq = -1; - - # Repeat the execution of each unit several times - loops = 10; - #loops = 1; - - # Resources - cpusPerTask = if (useNumactl) then hw.cpusPerNode else hw.cpusPerSocket; - ntasksPerNode = hw.cpusPerNode / cpusPerTask; - nodes = c.nodes; - qos = "debug"; - time = "02:00:00"; - jobName = unitName; - - tracing = "no"; - - # Enable permissions to write in the local storage - extraMounts = [ fs.local.temp ]; - - }; - - # Compute the array of configurations - configs = stdexp.buildConfigs { - inherit varConf genConf; - }; - - exec = {nextStage, conf, ...}: stages.exec ({ - inherit nextStage; - pre = '' - CDIR=$PWD - if [[ "${conf.tracing}" == "yes" ]]; then - export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf" - fi - EXECDIR="${fs.local.temp}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN" - mkdir -p $EXECDIR - cd $EXECDIR - ln -fs ${conf.fwiInput}/InputModels InputModels || true - ''; - argv = [ - "${conf.fwiInput}/fwi_params.txt" - "${conf.fwiInput}/fwi_frequencies.txt" - conf.blocksize - "-1" # Fordward steps - "-1" # Backward steps - conf.ioFreq # Write/read frequency - ]; - post = '' - rm -rf Results || true - if [[ "${conf.tracing}" == "yes" ]]; then - mv trace_* $CDIR - fi - ''; - } // optionalAttrs (conf.useNumact) { - program = "${numactl}/bin/numactl --interleave=all ${stageProgram nextStage}"; - }); - - apps = bsc.garlic.apps; - - # FWI program - program = {nextStage, conf, ...}: apps.fwi.solver.override { - inherit (conf) cc gitBranch fwiInput; - }; - - pipeline = stdexp.stdPipeline ++ [ exec program ]; - -in - - stdexp.genExperiment { inherit configs pipeline; } diff --git a/garlic/exp/fwi/strong_scaling_forkjoin.nix b/garlic/exp/fwi/strong_scaling_forkjoin.nix index 8ff84c2..902eaf6 100644 --- a/garlic/exp/fwi/strong_scaling_forkjoin.nix +++ b/garlic/exp/fwi/strong_scaling_forkjoin.nix @@ -1,3 +1,6 @@ +# Strong scaling test for FWI variants based on forkjoint. This +# experiment does not rely on block sizes. + { stdenv , stdexp @@ -15,20 +18,13 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" -# "garlic/mpi+send+oss+task" "garlic/mpi+send+omp+fork" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" ]; blocksize = [ 0 ]; n = [ - {nx=500; nz=500; ny=16000;} + {nx=100; nz=100; ny=8000;} ]; nodes = [ 1 2 4 8 16 ]; diff --git a/garlic/exp/fwi/strong_scaling_io.nix b/garlic/exp/fwi/strong_scaling_io.nix index 30f0e75..c063659 100644 --- a/garlic/exp/fwi/strong_scaling_io.nix +++ b/garlic/exp/fwi/strong_scaling_io.nix @@ -1,3 +1,10 @@ +# Strong scaling test for FWI variants based on tasks with and without I/O. +# This experiment solves a computationally expensive input which brings the +# storage devices to saturation when I/O is enabled. the same input us run +# without I/O for comparison purposes.. Also, the experiments are runt for a +# range of block sizes deemed as efficient according to the granularity +# experiment. + { stdenv , stdexp diff --git a/garlic/exp/fwi/strong_scaling_mpionly.nix b/garlic/exp/fwi/strong_scaling_mpionly.nix index 48283cb..94ffd8d 100644 --- a/garlic/exp/fwi/strong_scaling_mpionly.nix +++ b/garlic/exp/fwi/strong_scaling_mpionly.nix @@ -1,3 +1,7 @@ +# Strong scaling test for FWI variants based exclusively on MPI. This +# experiment does not rely on block sizes. An MPI process is instantiated per +# core. + { stdenv , stdexp @@ -15,24 +19,17 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" -# "garlic/mpi+send+oss+task" -# "garlic/mpi+send+omp+fork" "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" ]; blocksize = [ 0 ]; n = [ - {nx=500; nz=500; ny=16000;} + {nx=100; nz=100; ny=8000;} ]; - # Not enough planes for 8 and 16 nodes - nodes = [ 1 2 4 ]; + # Not enough planes for 4, 8 and 16 nodes + nodes = [ 1 2 ]; }; diff --git a/garlic/exp/fwi/strong_scaling_task.nix b/garlic/exp/fwi/strong_scaling_task.nix index 048fe7e..4ad7833 100644 --- a/garlic/exp/fwi/strong_scaling_task.nix +++ b/garlic/exp/fwi/strong_scaling_task.nix @@ -1,3 +1,7 @@ +# Strong scaling test for FWI variants based on tasks. This +# experiment explores a range of block sizes deemed as efficient +# according to the granularity experiment. + { stdenv , stdexp @@ -19,16 +23,12 @@ let "garlic/tampi+isend+oss+task" "garlic/mpi+send+omp+task" "garlic/mpi+send+oss+task" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" ]; - blocksize = [ 1 2 4 8 ]; + blocksize = [ 1 2 4 8 16 ]; n = [ - {nx=500; nz=500; ny=16000;} + {nx=100; nz=100; ny=8000;} ]; nodes = [ 1 2 4 8 16 ]; diff --git a/garlic/exp/fwi/test.nix b/garlic/exp/fwi/sync_io.nix similarity index 84% rename from garlic/exp/fwi/test.nix rename to garlic/exp/fwi/sync_io.nix index a9ea312..59e791d 100644 --- a/garlic/exp/fwi/test.nix +++ b/garlic/exp/fwi/sync_io.nix @@ -1,3 +1,7 @@ +# This experiment compares the effect of not using I/O versus using O_DIRECT | +# O_DSYNC enabled I/O. This is a reduced version of the strong_scaling_io +# experiment. + { stdenv , stdexp @@ -15,8 +19,8 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" - "garlic/mpi+send+omp+task" + "garlic/tampi+send+oss+task" +# "garlic/mpi+send+omp+task" # "garlic/mpi+send+oss+task" # "garlic/mpi+send+seq" # "garlic/oss+task" @@ -24,14 +28,16 @@ let # "garlic/seq" ]; - blocksize = [ 1 2 4 8 16 32 ]; - #blocksize = [ 1 2 4 8 ]; + blocksize = [ 1 ]; n = [ - #{nx=500; nz=500; ny=1000; ntpn=1; nn=1;} - {nx=500; nz=500; ny=2000; ntpn=2; nn=1;} + {nx=500; nz=500; ny=16000;} ]; + nodes = [ 4 ]; + + ioFreq = [ 9999 (-1) ]; + }; # The c value contains something like: @@ -57,14 +63,14 @@ let #nz = c.n.nz; # Same but shorter: - inherit (c.n) nx ny nz ntpn nn; + inherit (c.n) nx ny nz; fwiInput = bsc.apps.fwi.input.override { inherit (c.n) nx ny nz; }; # Other FWI parameters - ioFreq = -1; + ioFreq = c.ioFreq; # Repeat the execution of each unit several times loops = 10; @@ -72,8 +78,8 @@ let # Resources cpusPerTask = hw.cpusPerSocket; - ntasksPerNode = ntpn; - nodes = nn; + ntasksPerNode = 2; + nodes = c.nodes; qos = "debug"; time = "02:00:00"; jobName = unitName; diff --git a/garlic/exp/index.nix b/garlic/exp/index.nix index c50cdea..f6a518d 100644 --- a/garlic/exp/index.nix +++ b/garlic/exp/index.nix @@ -98,12 +98,13 @@ }; fwi = { - test = callPackage ./fwi/test.nix { }; + granularity = callPackage ./fwi/granularity.nix { }; strong_scaling_task = callPackage ./fwi/strong_scaling_task.nix { }; strong_scaling_forkjoin = callPackage ./fwi/strong_scaling_forkjoin.nix { }; strong_scaling_mpionly = callPackage ./fwi/strong_scaling_mpionly.nix { }; + data_reuse = callPackage ./fwi/data_reuse.nix { }; strong_scaling_io = callPackage ./fwi/strong_scaling_io.nix { }; - granularity = callPackage ./fwi/granularity.nix { }; + sync_io = callPackage ./fwi/sync_io.nix { }; }; osu = rec { diff --git a/garlic/fig/fwi/granularity.R b/garlic/fig/fwi/granularity.R index a1fc99d..40d6f75 100644 --- a/garlic/fig/fwi/granularity.R +++ b/garlic/fig/fwi/granularity.R @@ -30,7 +30,7 @@ w=5 #################################################################### ### Line Graph #################################################################### -png("time.png", width=w*ppi, height=h*ppi, res=ppi) +png("mtime.png", width=w*ppi, height=h*ppi, res=ppi) ## Create the plot with the normalized time vs nblocks p = ggplot(df, aes(x = blocksize, y=mtime, group=gitBranch, color=gitBranch)) + @@ -49,22 +49,23 @@ print(p) dev.off() #################################################################### -### Boxplot +### Line Graph #################################################################### -png("box.png", width=w*ppi, height=h*ppi, res=ppi) -# Create the plot with the normalized time vs nblocks -p = ggplot(df, aes(x=blocksize, y=time, group=gitBranch, colour=gitBranch)) + - # Labels - labs(x="Blocksize", y="Normalized time", - title=sprintf("FWI Time"), - subtitle=input_file) + - # Draw boxplots - geom_boxplot() + - theme_bw() + - theme(plot.subtitle=element_text(size=8)) + - theme(legend.position = c(0.5, 0.88)) +png("time.png", width=w*ppi, height=h*ppi, res=ppi) + +## Create the plot with the normalized time vs nblocks +p = ggplot(df, aes(x = blocksize, y=time, group=gitBranch, color=gitBranch)) + + geom_point() + + geom_line() + + theme_bw() + + labs(x="Blocksize", y="Time (s)", title="FWI granularity", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + + theme(legend.position = c(0.5, 0.88)) + # Render the plot print(p) -## Save the png image + +# Save the png image dev.off() diff --git a/garlic/fig/fwi/strong_scaling.R b/garlic/fig/fwi/strong_scaling.R index 5dd4bb5..89d79f1 100644 --- a/garlic/fig/fwi/strong_scaling.R +++ b/garlic/fig/fwi/strong_scaling.R @@ -14,7 +14,7 @@ dataset = jsonlite::stream_in(file(input_file)) %>% jsonlite::flatten() # Select block size to display -useBlocksize = 1 +useBlocksize = 2 # We only need the nblocks and time df = select(dataset, config.blocksize, config.gitBranch, config.nodes, time) %>% @@ -59,7 +59,7 @@ print(p) dev.off() #################################################################### -### Line plot (timei x nodes) +### Line plot (time x nodes) #################################################################### png("nxtime.png", width=w*ppi, height=h*ppi, res=ppi) diff --git a/garlic/fig/fwi/test.R b/garlic/fig/fwi/test.R deleted file mode 100644 index ca79f0d..0000000 --- a/garlic/fig/fwi/test.R +++ /dev/null @@ -1,46 +0,0 @@ -library(ggplot2) -library(dplyr) -library(scales) -library(jsonlite) - -args=commandArgs(trailingOnly=TRUE) - -# Read the timetable from args[1] -input_file = "input.json" -if (length(args)>0) { input_file = args[1] } - -# Load the dataset in NDJSON format -dataset = jsonlite::stream_in(file(input_file)) %>% - jsonlite::flatten() - -# We only need the nblocks and time -df = select(dataset, config.blocksize, config.gitBranch, time) %>% - rename(blocksize=config.blocksize, gitBranch=config.gitBranch) %>% - group_by(blocksize, gitBranch) %>% - mutate(mtime = median(time)) %>% - ungroup() - -df$gitBranch = as.factor(df$gitBranch) -df$blocksize = as.factor(df$blocksize) - -ppi=300 -h=5 -w=5 - -png("time.png", width=w*ppi, height=h*ppi, res=ppi) -# -## Create the plot with the normalized time vs nblocks -p = ggplot(df, aes(x=blocksize, y=time)) + - geom_point() + - geom_line(aes(y=mtime, group=gitBranch, color=gitBranch)) + - theme_bw() + - labs(x="Blocksize", y="Time (s)", title="FWI granularity", - subtitle=input_file) + - theme(plot.subtitle=element_text(size=8)) + - theme(legend.position = c(0.5, 0.88)) - -# Render the plot -print(p) - -# Save the png image -dev.off() diff --git a/garlic/fig/index.nix b/garlic/fig/index.nix index 5ca637f..b3f77ea 100644 --- a/garlic/fig/index.nix +++ b/garlic/fig/index.nix @@ -62,10 +62,12 @@ in }; fwi = with exp.fwi; { - test = stdPlot ./fwi/test.R [ test ]; - strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin strong_scaling_mpionly ]; - strong_scaling_io = stdPlot ./fwi/strong_scaling_io.R [ strong_scaling_io ]; granularity = stdPlot ./fwi/granularity.R [ granularity ]; + strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin ]; + #strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin strong_scaling_mpionly ]; + data_reuse = stdPlot ./fwi/granularity.R [ data_reuse ]; + strong_scaling_io = stdPlot ./fwi/strong_scaling_io.R [ strong_scaling_io ]; + sync_io = stdPlot ./fwi/strong_scaling_io.R [ sync_io ]; }; osu = with exp.osu; {