Add dataset attrset in garlic

Modify nbody to evenly distribute blocks per cpu
This commit is contained in:
Rodrigo Arias 2020-10-23 10:19:37 +02:00
parent 06c29b573f
commit 8ce88ef046
5 changed files with 66 additions and 48 deletions

View File

@ -8,7 +8,7 @@
# Leave the first CPU per socket unused? # Leave the first CPU per socket unused?
, freeCpu ? false , freeCpu ? false
, particles ? 1024 * 32 , particles ? 4096 * 24
}: }:
with stdenv.lib; with stdenv.lib;
@ -16,8 +16,7 @@ with stdenv.lib;
let let
# Initial variable configuration # Initial variable configuration
varConf = with bsc; { varConf = with bsc; {
# We need at least cpusPerNode blocks nblocks = [ 12 24 48 96 192 384 768 ];
nblocks = [ 4 8 16 32 64 128 256 512 ];
}; };
machineConfig = targetMachine.config; machineConfig = targetMachine.config;

View File

@ -6,29 +6,34 @@ library(jsonlite)
args=commandArgs(trailingOnly=TRUE) args=commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1] # Read the timetable from args[1]
input_file = "timetable.json.gz" input_file = "input.json"
if (length(args)>0) { input_file = args[1] } if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format # Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file)) %>% dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten() jsonlite::flatten()
# We only need the cpu bind, blocksize and time particles = unique(dataset$config.particles)
df = select(dataset, config.freeCpu, config.blocksize, time) %>%
rename(blocksize=config.blocksize, freeCpu=config.freeCpu) # We only need the cpu bind, nblocks and time
df = select(dataset, config.freeCpu, config.nblocks, config.hw.cpusPerSocket, time) %>%
rename(nblocks=config.nblocks,
freeCpu=config.freeCpu,
cpusPerSocket=config.hw.cpusPerSocket)
df = df %>% mutate(blocksPerCpu = nblocks / cpusPerSocket)
# Use the blocksize as factor
df$blocksize = as.factor(df$blocksize)
df$freeCpu = as.factor(df$freeCpu) df$freeCpu = as.factor(df$freeCpu)
df$nblocks = as.factor(df$nblocks)
df$blocksPerCpuFactor = as.factor(df$blocksPerCpu)
# Split by malloc variant # Split by malloc variant
D=df %>% group_by(freeCpu, blocksize) %>% D=df %>% group_by(freeCpu, nblocks) %>%
mutate(tnorm = time / median(time) - 1) mutate(tnorm = time / median(time) - 1)
bs_unique = unique(df$blocksize) bs_unique = unique(df$nblocks)
nbs=length(bs_unique) nbs=length(bs_unique)
print(D) print(D)
ppi=300 ppi=300
@ -39,12 +44,12 @@ png("box.png", width=w*ppi, height=h*ppi, res=ppi)
# #
# #
# #
# Create the plot with the normalized time vs blocksize # Create the plot with the normalized time vs nblocks
p = ggplot(data=D, aes(x=blocksize, y=tnorm)) + p = ggplot(data=D, aes(x=blocksPerCpuFactor, y=tnorm)) +
# Labels # Labels
labs(x="Block size", y="Normalized time", labs(x="Blocks/CPU", y="Normalized time",
title="Nbody normalized time", title=sprintf("Nbody normalized time. Particles=%d", particles),
subtitle=input_file) + subtitle=input_file) +
# Center the title # Center the title
@ -85,14 +90,15 @@ dev.off()
# #
png("scatter.png", width=w*ppi, height=h*ppi, res=ppi) png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
# #
## Create the plot with the normalized time vs blocksize ## Create the plot with the normalized time vs nblocks
p = ggplot(D, aes(x=blocksize, y=time, color=freeCpu)) + p = ggplot(D, aes(x=blocksPerCpuFactor, y=time, color=freeCpu)) +
labs(x="Block size", y="Time (s)", labs(x="Blocks/CPU", y="Time (s)",
title="Nbody granularity", title=sprintf("Nbody granularity. Particles=%d", particles),
subtitle=input_file) + subtitle=input_file) +
theme_bw() + theme_bw() +
theme(plot.subtitle=element_text(size=8)) + theme(plot.subtitle=element_text(size=8)) +
theme(legend.position = c(0.5, 0.88)) +
geom_point(shape=21, size=3) + geom_point(shape=21, size=3) +
#scale_x_continuous(trans=log2_trans()) + #scale_x_continuous(trans=log2_trans()) +

View File

@ -6,30 +6,36 @@ library(jsonlite)
args=commandArgs(trailingOnly=TRUE) args=commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1] # Read the timetable from args[1]
input_file = "timetable.json.gz" input_file = "input.json"
if (length(args)>0) { input_file = args[1] } if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format # Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file)) %>% dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten() jsonlite::flatten()
# We only need the cpu bind, blocksize and time particles = unique(dataset$config.particles)
df = select(dataset, config.enableJemalloc, config.blocksize, time) %>%
rename(blocksize=config.blocksize, # We only need the cpu bind, nblocks and time
jemalloc=config.enableJemalloc) df = select(dataset, config.enableJemalloc, config.nblocks, config.hw.cpusPerSocket, time) %>%
rename(nblocks=config.nblocks,
jemalloc=config.enableJemalloc,
cpusPerSocket=config.hw.cpusPerSocket)
df = df %>% mutate(blocksPerCpu = nblocks / cpusPerSocket)
# Use the blocksize as factor
df$blocksize = as.factor(df$blocksize)
df$jemalloc = as.factor(df$jemalloc) df$jemalloc = as.factor(df$jemalloc)
df$nblocks = as.factor(df$nblocks)
df$blocksPerCpuFactor = as.factor(df$blocksPerCpu)
# Split by malloc variant # Split by malloc variant
D=df %>% group_by(jemalloc, blocksize) %>% D=df %>% group_by(jemalloc, nblocks) %>%
mutate(tnorm = time / median(time) - 1) mutate(tnorm = time / median(time) - 1)
# Add another column: blocksPerCpu (we assume one task per socket, using
# all CPUs)
bs_unique = unique(df$blocksize) bs_unique = unique(df$nblocks)
nbs=length(bs_unique) nbs=length(bs_unique)
print(D) print(D)
ppi=300 ppi=300
@ -40,12 +46,12 @@ png("box.png", width=w*ppi, height=h*ppi, res=ppi)
# #
# #
# #
# Create the plot with the normalized time vs blocksize # Create the plot with the normalized time vs nblocks
p = ggplot(data=D, aes(x=blocksize, y=tnorm)) + p = ggplot(data=D, aes(x=nblocks, y=tnorm)) +
# Labels # Labels
labs(x="Block size", y="Normalized time", labs(x="Num blocks", y="Normalized time",
title="Nbody normalized time", title=sprintf("Nbody normalized time. Particles=%d", particles),
subtitle=input_file) + subtitle=input_file) +
# Center the title # Center the title
@ -62,8 +68,7 @@ p = ggplot(data=D, aes(x=blocksize, y=tnorm)) +
geom_boxplot(aes(fill=jemalloc)) + geom_boxplot(aes(fill=jemalloc)) +
# # Use log2 scale in x # # Use log2 scale in x
# scale_x_continuous(trans=log2_trans(), # scale_x_continuous(trans=log2_trans()) +
# breaks=bs_unique) +
# #
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
@ -86,17 +91,18 @@ dev.off()
# #
png("scatter.png", width=w*ppi, height=h*ppi, res=ppi) png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
# #
## Create the plot with the normalized time vs blocksize ## Create the plot with the normalized time vs nblocks
p = ggplot(D, aes(x=blocksize, y=time, color=jemalloc)) + p = ggplot(D, aes(x=blocksPerCpu, y=time, color=jemalloc)) +
labs(x="Block size", y="Time (s)", labs(x="Blocks/CPU", y="Time (s)",
title="Nbody granularity", title=sprintf("Nbody granularity. Particles=%d", particles),
subtitle=input_file) + subtitle=input_file) +
theme_bw() + theme_bw() +
theme(plot.subtitle=element_text(size=8)) + theme(plot.subtitle=element_text(size=8)) +
theme(legend.position = c(0.5, 0.88)) +
geom_point(shape=21, size=3) + geom_point(shape=21, size=3) +
#scale_x_continuous(trans=log2_trans()) + scale_x_continuous(trans=log2_trans()) +
scale_y_continuous(trans=log2_trans()) scale_y_continuous(trans=log2_trans())
# Render the plot # Render the plot

View File

@ -20,8 +20,7 @@ stdenv.mkDerivation {
conf=garlic_config.json conf=garlic_config.json
for run in $(ls -d [0-9]* | sort -n); do for run in $(ls -d [0-9]* | sort -n); do
time=$(awk '/^time /{print $2}' $run/stdout.log) time=$(awk '/^time /{print $2}' $run/stdout.log)
jq -cn "{ exp:\"$exp\", unit:\"$unit\", config:inputs, time:$time}" \ jq -cn "{ exp:\"$exp\", unit:\"$unit\", config:inputs, time:$time, run:$run }" $conf >> $out
$conf >> $out
done done
done done
done done

View File

@ -284,7 +284,7 @@ let
tampi = callPackage ./garlic/exp/nbody/tampi.nix { }; tampi = callPackage ./garlic/exp/nbody/tampi.nix { };
# Experiment variants # Experiment variants
medium = tampi.override { particles = 64 * 1024; }; medium = tampi.override { particles = 24 * 4096; };
baseline = medium; baseline = medium;
freeCpu = baseline.override { freeCpu = true; }; freeCpu = baseline.override { freeCpu = true; };
jemalloc = baseline.override { enableJemalloc = true; }; jemalloc = baseline.override { enableJemalloc = true; };
@ -329,23 +329,31 @@ let
timetableFromTrebuchet = tre: timetable (resultFromTrebuchet tre); timetableFromTrebuchet = tre: timetable (resultFromTrebuchet tre);
mergeDatasets = callPackage ./garlic/pp/merge.nix { }; mergeDatasets = callPackage ./garlic/pp/merge.nix { };
# Takes a list of experiments and returns a file that contains the # Takes a list of experiments and returns a file that contains
# all timetable results from the experiments. # all timetable results from the experiments.
merge = exps: mergeDatasets (map timetableFromTrebuchet exps); merge = exps: mergeDatasets (map timetableFromTrebuchet exps);
}; };
# Datasets used in the figures
ds = with self.bsc.garlic; {
nbody = {
jemalloc = with exp.nbody; pp.merge [ baseline jemalloc ];
freeCpu = with exp.nbody; pp.merge [ baseline freeCpu ];
};
};
# Figures generated from the experiments # Figures generated from the experiments
fig = with self.bsc.garlic; { fig = with self.bsc.garlic; {
nbody = { nbody = {
jemalloc = pp.rPlot { jemalloc = pp.rPlot {
script = ./garlic/fig/nbody/jemalloc.R; script = ./garlic/fig/nbody/jemalloc.R;
dataset = with exp.nbody; pp.merge [ baseline jemalloc ]; dataset = ds.nbody.jemalloc;
}; };
freeCpu = pp.rPlot { freeCpu = pp.rPlot {
script = ./garlic/fig/nbody/freeCpu.R; script = ./garlic/fig/nbody/freeCpu.R;
dataset = with exp.nbody; pp.merge [ baseline freeCpu ]; dataset = ds.nbody.freeCpu;
}; };
}; };
@ -359,5 +367,5 @@ in
# Aliases # Aliases
garlic = bsc.garlic; garlic = bsc.garlic;
inherit (bsc.garlic) exp fig apps; inherit (bsc.garlic) exp fig apps ds;
} }