diff --git a/garlic/apps/fwi/default.nix b/garlic/apps/fwi/default.nix index 013fcef..5b1a9d6 100644 --- a/garlic/apps/fwi/default.nix +++ b/garlic/apps/fwi/default.nix @@ -34,6 +34,7 @@ stdenv.mkDerivation rec { # FIXME: Allow multiple MPI implementations postPatch = '' sed -i 's/= OPENMPI$/= INTEL/g' Makefile + sed -i 's/USE_O_DIRECT ?= NO/USE_O_DIRECT ?= YES/g' Makefile || true ''; # FIXME: This is an ugly hack. diff --git a/garlic/exp/fwi/data_reuse.nix b/garlic/exp/fwi/data_reuse.nix index 623492a..84f0c4b 100644 --- a/garlic/exp/fwi/data_reuse.nix +++ b/garlic/exp/fwi/data_reuse.nix @@ -1,3 +1,23 @@ +# This test compares a FWI version using poor data locality (+NOREUSE) versus +# the optimized version (used for all other experiments). Follows a pseudocode +# snippet illustrating the fundamental difference between version. +# +# NOREUSE +# ---------------------- +# for (y) for (x) for (z) +# computA(v[y][x][z]); +# for (y) for (x) for (z) +# computB(v[y][x][z]); +# for (y) for (x) for (z) +# computC(v[y][x][z]); +# +# Optimized version +# ---------------------- +# for (y) for (x) for (z) +# computA(v[y][x][z]); +# computB(v[y][x][z]); +# computC(v[y][x][z]); + { stdenv , stdexp @@ -15,34 +35,14 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" "garlic/mpi+send+oss+task" - "garlic/mpi+send+oss+task+noreuse" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" + "garlic/mpi+send+oss+task+NOREUSE" ]; blocksize = [ 1 2 4 8 ]; - #blocksize = [ 1 2 ]; n = [ -# {nx=50; ny=4000; nz=50;} -# {nx=20; ny=4000; nz=20;} -# {nx=300; ny=8000; nz=300;} # half node, / -# {nx=300; ny=1000; nz=300;} # half node, / -# {nx=200; ny=1000; nz=200;} # half node, not enough tasks -# {nx=200; ny=4000; nz=200;} # --/ half node -# {nx=250; ny=2000; nz=250;} # / half node {nx=300; ny=2000; nz=300;} # / half node -# {nx=100; ny=2000; nz=100;} # \-// half node -# {nx=150; ny=2000; nz=150;} # \-/ half node -# {nx=200; ny=64000; nz=200;} # --/ 16 nodes -# {nx=200; ny=4000; nz=200;} # --/ half node -# {nx=200; ny=8000; nz=200;} # --/ 1 node -# {nx=100; ny=8000; nz=100;} # --/ half node ]; }; diff --git a/garlic/exp/fwi/granularity.nix b/garlic/exp/fwi/granularity.nix index 1c8cac4..7773b3b 100644 --- a/garlic/exp/fwi/granularity.nix +++ b/garlic/exp/fwi/granularity.nix @@ -1,3 +1,5 @@ +# Regular granularity test for FWI + { stdenv , stdexp @@ -15,20 +17,20 @@ let # Initial variable configuration varConf = { gitBranch = [ - "garlic/tampi+send+oss+task" - "garlic/tampi+isend+oss+task" - "garlic/mpi+send+omp+task" - "garlic/mpi+send+oss+task" +# "garlic/tampi+send+oss+task" + "garlic/tampi+isend+oss+task" +# "garlic/mpi+send+omp+task" +# "garlic/mpi+send+oss+task" # "garlic/mpi+send+seq" # "garlic/oss+task" # "garlic/omp+task" # "garlic/seq" ]; - blocksize = [ 1 2 4 8 16 32 ]; + blocksize = [ 1 2 4 8 16 32 64 128 256 ]; n = [ - {nx=500; nz=500; ny=2000; ntpn=2; nn=1;} + {nx=100; nz=100; ny=8000; ntpn=2; nn=1;} ]; }; diff --git a/garlic/exp/fwi/memory_affinity.nix b/garlic/exp/fwi/memory_affinity.nix deleted file mode 100644 index 3df7226..0000000 --- a/garlic/exp/fwi/memory_affinity.nix +++ /dev/null @@ -1,138 +0,0 @@ -{ - stdenv -, stdexp -, bsc -, targetMachine -, stages -}: - -with stdenv.lib; - -let - - inherit (targetMachine) fs; - - # Initial variable configuration - varConf = { - gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" - "garlic/mpi+send+oss+task" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" - ]; - - blocksize = [ 1 ]; - - n = [ -# {nx=500; nz=500; ny=8000;} - {nx=500; nz=500; ny=2000;} - ]; - - nodes = [ 1 ] - - numactl = [ true false ] - - }; - -# The c value contains something like: -# { -# n = { nx=500; ny=500; nz=500; } -# blocksize = 1; -# gitBranch = "garlic/tampi+send+oss+task"; -# } - - machineConfig = targetMachine.config; - - # Generate the complete configuration for each unit - genConf = with bsc; c: targetMachine.config // rec { - expName = "fwi"; - unitName = "${expName}-test"; - inherit (machineConfig) hw; - - cc = icc; - inherit (c) gitBranch blocksize; - useNumactl = c.numactl - - #nx = c.n.nx; - #ny = c.n.ny; - #nz = c.n.nz; - - # Same but shorter: - inherit (c.n) nx ny nz; - - fwiInput = bsc.apps.fwi.input.override { - inherit (c.n) nx ny nz; - }; - - # Other FWI parameters - ioFreq = -1; - - # Repeat the execution of each unit several times - loops = 10; - #loops = 1; - - # Resources - cpusPerTask = if (useNumactl) then hw.cpusPerNode else hw.cpusPerSocket; - ntasksPerNode = hw.cpusPerNode / cpusPerTask; - nodes = c.nodes; - qos = "debug"; - time = "02:00:00"; - jobName = unitName; - - tracing = "no"; - - # Enable permissions to write in the local storage - extraMounts = [ fs.local.temp ]; - - }; - - # Compute the array of configurations - configs = stdexp.buildConfigs { - inherit varConf genConf; - }; - - exec = {nextStage, conf, ...}: stages.exec ({ - inherit nextStage; - pre = '' - CDIR=$PWD - if [[ "${conf.tracing}" == "yes" ]]; then - export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf" - fi - EXECDIR="${fs.local.temp}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN" - mkdir -p $EXECDIR - cd $EXECDIR - ln -fs ${conf.fwiInput}/InputModels InputModels || true - ''; - argv = [ - "${conf.fwiInput}/fwi_params.txt" - "${conf.fwiInput}/fwi_frequencies.txt" - conf.blocksize - "-1" # Fordward steps - "-1" # Backward steps - conf.ioFreq # Write/read frequency - ]; - post = '' - rm -rf Results || true - if [[ "${conf.tracing}" == "yes" ]]; then - mv trace_* $CDIR - fi - ''; - } // optionalAttrs (conf.useNumact) { - program = "${numactl}/bin/numactl --interleave=all ${stageProgram nextStage}"; - }); - - apps = bsc.garlic.apps; - - # FWI program - program = {nextStage, conf, ...}: apps.fwi.solver.override { - inherit (conf) cc gitBranch fwiInput; - }; - - pipeline = stdexp.stdPipeline ++ [ exec program ]; - -in - - stdexp.genExperiment { inherit configs pipeline; } diff --git a/garlic/exp/fwi/strong_scaling_forkjoin.nix b/garlic/exp/fwi/strong_scaling_forkjoin.nix index 8ff84c2..902eaf6 100644 --- a/garlic/exp/fwi/strong_scaling_forkjoin.nix +++ b/garlic/exp/fwi/strong_scaling_forkjoin.nix @@ -1,3 +1,6 @@ +# Strong scaling test for FWI variants based on forkjoint. This +# experiment does not rely on block sizes. + { stdenv , stdexp @@ -15,20 +18,13 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" -# "garlic/mpi+send+oss+task" "garlic/mpi+send+omp+fork" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" ]; blocksize = [ 0 ]; n = [ - {nx=500; nz=500; ny=16000;} + {nx=100; nz=100; ny=8000;} ]; nodes = [ 1 2 4 8 16 ]; diff --git a/garlic/exp/fwi/strong_scaling_io.nix b/garlic/exp/fwi/strong_scaling_io.nix index 30f0e75..c063659 100644 --- a/garlic/exp/fwi/strong_scaling_io.nix +++ b/garlic/exp/fwi/strong_scaling_io.nix @@ -1,3 +1,10 @@ +# Strong scaling test for FWI variants based on tasks with and without I/O. +# This experiment solves a computationally expensive input which brings the +# storage devices to saturation when I/O is enabled. the same input us run +# without I/O for comparison purposes.. Also, the experiments are runt for a +# range of block sizes deemed as efficient according to the granularity +# experiment. + { stdenv , stdexp diff --git a/garlic/exp/fwi/strong_scaling_mpionly.nix b/garlic/exp/fwi/strong_scaling_mpionly.nix index 48283cb..94ffd8d 100644 --- a/garlic/exp/fwi/strong_scaling_mpionly.nix +++ b/garlic/exp/fwi/strong_scaling_mpionly.nix @@ -1,3 +1,7 @@ +# Strong scaling test for FWI variants based exclusively on MPI. This +# experiment does not rely on block sizes. An MPI process is instantiated per +# core. + { stdenv , stdexp @@ -15,24 +19,17 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" -# "garlic/mpi+send+omp+task" -# "garlic/mpi+send+oss+task" -# "garlic/mpi+send+omp+fork" "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" ]; blocksize = [ 0 ]; n = [ - {nx=500; nz=500; ny=16000;} + {nx=100; nz=100; ny=8000;} ]; - # Not enough planes for 8 and 16 nodes - nodes = [ 1 2 4 ]; + # Not enough planes for 4, 8 and 16 nodes + nodes = [ 1 2 ]; }; diff --git a/garlic/exp/fwi/strong_scaling_task.nix b/garlic/exp/fwi/strong_scaling_task.nix index 048fe7e..4ad7833 100644 --- a/garlic/exp/fwi/strong_scaling_task.nix +++ b/garlic/exp/fwi/strong_scaling_task.nix @@ -1,3 +1,7 @@ +# Strong scaling test for FWI variants based on tasks. This +# experiment explores a range of block sizes deemed as efficient +# according to the granularity experiment. + { stdenv , stdexp @@ -19,16 +23,12 @@ let "garlic/tampi+isend+oss+task" "garlic/mpi+send+omp+task" "garlic/mpi+send+oss+task" -# "garlic/mpi+send+seq" -# "garlic/oss+task" -# "garlic/omp+task" -# "garlic/seq" ]; - blocksize = [ 1 2 4 8 ]; + blocksize = [ 1 2 4 8 16 ]; n = [ - {nx=500; nz=500; ny=16000;} + {nx=100; nz=100; ny=8000;} ]; nodes = [ 1 2 4 8 16 ]; diff --git a/garlic/exp/fwi/test.nix b/garlic/exp/fwi/sync_io.nix similarity index 84% rename from garlic/exp/fwi/test.nix rename to garlic/exp/fwi/sync_io.nix index a9ea312..59e791d 100644 --- a/garlic/exp/fwi/test.nix +++ b/garlic/exp/fwi/sync_io.nix @@ -1,3 +1,7 @@ +# This experiment compares the effect of not using I/O versus using O_DIRECT | +# O_DSYNC enabled I/O. This is a reduced version of the strong_scaling_io +# experiment. + { stdenv , stdexp @@ -15,8 +19,8 @@ let # Initial variable configuration varConf = { gitBranch = [ -# "garlic/tampi+send+oss+task" - "garlic/mpi+send+omp+task" + "garlic/tampi+send+oss+task" +# "garlic/mpi+send+omp+task" # "garlic/mpi+send+oss+task" # "garlic/mpi+send+seq" # "garlic/oss+task" @@ -24,14 +28,16 @@ let # "garlic/seq" ]; - blocksize = [ 1 2 4 8 16 32 ]; - #blocksize = [ 1 2 4 8 ]; + blocksize = [ 1 ]; n = [ - #{nx=500; nz=500; ny=1000; ntpn=1; nn=1;} - {nx=500; nz=500; ny=2000; ntpn=2; nn=1;} + {nx=500; nz=500; ny=16000;} ]; + nodes = [ 4 ]; + + ioFreq = [ 9999 (-1) ]; + }; # The c value contains something like: @@ -57,14 +63,14 @@ let #nz = c.n.nz; # Same but shorter: - inherit (c.n) nx ny nz ntpn nn; + inherit (c.n) nx ny nz; fwiInput = bsc.apps.fwi.input.override { inherit (c.n) nx ny nz; }; # Other FWI parameters - ioFreq = -1; + ioFreq = c.ioFreq; # Repeat the execution of each unit several times loops = 10; @@ -72,8 +78,8 @@ let # Resources cpusPerTask = hw.cpusPerSocket; - ntasksPerNode = ntpn; - nodes = nn; + ntasksPerNode = 2; + nodes = c.nodes; qos = "debug"; time = "02:00:00"; jobName = unitName; diff --git a/garlic/exp/index.nix b/garlic/exp/index.nix index c50cdea..f6a518d 100644 --- a/garlic/exp/index.nix +++ b/garlic/exp/index.nix @@ -98,12 +98,13 @@ }; fwi = { - test = callPackage ./fwi/test.nix { }; + granularity = callPackage ./fwi/granularity.nix { }; strong_scaling_task = callPackage ./fwi/strong_scaling_task.nix { }; strong_scaling_forkjoin = callPackage ./fwi/strong_scaling_forkjoin.nix { }; strong_scaling_mpionly = callPackage ./fwi/strong_scaling_mpionly.nix { }; + data_reuse = callPackage ./fwi/data_reuse.nix { }; strong_scaling_io = callPackage ./fwi/strong_scaling_io.nix { }; - granularity = callPackage ./fwi/granularity.nix { }; + sync_io = callPackage ./fwi/sync_io.nix { }; }; osu = rec { diff --git a/garlic/fig/fwi/granularity.R b/garlic/fig/fwi/granularity.R index a1fc99d..40d6f75 100644 --- a/garlic/fig/fwi/granularity.R +++ b/garlic/fig/fwi/granularity.R @@ -30,7 +30,7 @@ w=5 #################################################################### ### Line Graph #################################################################### -png("time.png", width=w*ppi, height=h*ppi, res=ppi) +png("mtime.png", width=w*ppi, height=h*ppi, res=ppi) ## Create the plot with the normalized time vs nblocks p = ggplot(df, aes(x = blocksize, y=mtime, group=gitBranch, color=gitBranch)) + @@ -49,22 +49,23 @@ print(p) dev.off() #################################################################### -### Boxplot +### Line Graph #################################################################### -png("box.png", width=w*ppi, height=h*ppi, res=ppi) -# Create the plot with the normalized time vs nblocks -p = ggplot(df, aes(x=blocksize, y=time, group=gitBranch, colour=gitBranch)) + - # Labels - labs(x="Blocksize", y="Normalized time", - title=sprintf("FWI Time"), - subtitle=input_file) + - # Draw boxplots - geom_boxplot() + - theme_bw() + - theme(plot.subtitle=element_text(size=8)) + - theme(legend.position = c(0.5, 0.88)) +png("time.png", width=w*ppi, height=h*ppi, res=ppi) + +## Create the plot with the normalized time vs nblocks +p = ggplot(df, aes(x = blocksize, y=time, group=gitBranch, color=gitBranch)) + + geom_point() + + geom_line() + + theme_bw() + + labs(x="Blocksize", y="Time (s)", title="FWI granularity", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + + theme(legend.position = c(0.5, 0.88)) + # Render the plot print(p) -## Save the png image + +# Save the png image dev.off() diff --git a/garlic/fig/fwi/strong_scaling.R b/garlic/fig/fwi/strong_scaling.R index 5dd4bb5..89d79f1 100644 --- a/garlic/fig/fwi/strong_scaling.R +++ b/garlic/fig/fwi/strong_scaling.R @@ -14,7 +14,7 @@ dataset = jsonlite::stream_in(file(input_file)) %>% jsonlite::flatten() # Select block size to display -useBlocksize = 1 +useBlocksize = 2 # We only need the nblocks and time df = select(dataset, config.blocksize, config.gitBranch, config.nodes, time) %>% @@ -59,7 +59,7 @@ print(p) dev.off() #################################################################### -### Line plot (timei x nodes) +### Line plot (time x nodes) #################################################################### png("nxtime.png", width=w*ppi, height=h*ppi, res=ppi) diff --git a/garlic/fig/fwi/test.R b/garlic/fig/fwi/test.R deleted file mode 100644 index ca79f0d..0000000 --- a/garlic/fig/fwi/test.R +++ /dev/null @@ -1,46 +0,0 @@ -library(ggplot2) -library(dplyr) -library(scales) -library(jsonlite) - -args=commandArgs(trailingOnly=TRUE) - -# Read the timetable from args[1] -input_file = "input.json" -if (length(args)>0) { input_file = args[1] } - -# Load the dataset in NDJSON format -dataset = jsonlite::stream_in(file(input_file)) %>% - jsonlite::flatten() - -# We only need the nblocks and time -df = select(dataset, config.blocksize, config.gitBranch, time) %>% - rename(blocksize=config.blocksize, gitBranch=config.gitBranch) %>% - group_by(blocksize, gitBranch) %>% - mutate(mtime = median(time)) %>% - ungroup() - -df$gitBranch = as.factor(df$gitBranch) -df$blocksize = as.factor(df$blocksize) - -ppi=300 -h=5 -w=5 - -png("time.png", width=w*ppi, height=h*ppi, res=ppi) -# -## Create the plot with the normalized time vs nblocks -p = ggplot(df, aes(x=blocksize, y=time)) + - geom_point() + - geom_line(aes(y=mtime, group=gitBranch, color=gitBranch)) + - theme_bw() + - labs(x="Blocksize", y="Time (s)", title="FWI granularity", - subtitle=input_file) + - theme(plot.subtitle=element_text(size=8)) + - theme(legend.position = c(0.5, 0.88)) - -# Render the plot -print(p) - -# Save the png image -dev.off() diff --git a/garlic/fig/index.nix b/garlic/fig/index.nix index 5ca637f..b3f77ea 100644 --- a/garlic/fig/index.nix +++ b/garlic/fig/index.nix @@ -62,10 +62,12 @@ in }; fwi = with exp.fwi; { - test = stdPlot ./fwi/test.R [ test ]; - strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin strong_scaling_mpionly ]; - strong_scaling_io = stdPlot ./fwi/strong_scaling_io.R [ strong_scaling_io ]; granularity = stdPlot ./fwi/granularity.R [ granularity ]; + strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin ]; + #strong_scaling = stdPlot ./fwi/strong_scaling.R [ strong_scaling_task strong_scaling_forkjoin strong_scaling_mpionly ]; + data_reuse = stdPlot ./fwi/granularity.R [ data_reuse ]; + strong_scaling_io = stdPlot ./fwi/strong_scaling_io.R [ strong_scaling_io ]; + sync_io = stdPlot ./fwi/strong_scaling_io.R [ sync_io ]; }; osu = with exp.osu; {