creams: update figures using one single pipeline

This commit is contained in:
Rodrigo Arias 2021-03-30 15:59:52 +02:00
parent 87f751185c
commit 76deac0a63
4 changed files with 212 additions and 166 deletions

View File

@ -1,71 +0,0 @@
library(ggplot2)
library(dplyr)
library(scales)
library(jsonlite)
args=commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1]
input_file = "input.json"
if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
jsonlite::flatten()
# We only need some colums
df = select(dataset, unit, config.nodes, config.gitBranch,
config.granul, time, total_time) %>%
rename(nodes=config.nodes, gitBranch=config.gitBranch,
granul=config.granul)
df$unit = as.factor(df$unit)
df$nnodes = df$nodes
df$nodes = as.factor(df$nodes)
df$gitBranch = as.factor(df$gitBranch)
df$granul = as.factor(df$granul)
# Remove the "garlic/" prefix from the gitBranch
levels(df$gitBranch) <- substring((levels(df$gitBranch)), 8)
# Compute new columns
D=group_by(df, unit) %>%
mutate(tnorm = time / median(time) - 1) %>%
mutate(bad = ifelse(max(abs(tnorm)) >= 0.01, 1, 0)) %>%
mutate(variability = ifelse(bad > 0, "large", "ok")) %>%
mutate(mtime = median(time)) %>%
mutate(nmtime = mtime*nnodes) %>%
mutate(ntime = time*nnodes) %>%
ungroup() %>%
mutate(min_nmtime = min(nmtime)) %>%
mutate(rnmtime = nmtime / min_nmtime) %>%
mutate(rntime = ntime / min_nmtime) %>%
mutate(rmeff = 1.0 / rnmtime) %>%
mutate(reff = 1.0 / rntime) %>%
group_by(gitBranch) %>%
mutate(tmax = max(mtime)) %>%
mutate(speedup=tmax/time) %>%
mutate(eff=speedup/nnodes) %>%
mutate(mspeedup=tmax/mtime) %>%
mutate(meff=mspeedup/nnodes) %>%
ungroup()
D$bad = as.factor(D$bad > 0)
D$variability = as.factor(D$variability)
ppi=300
h=5
w=5
png("time.png", width=w*1.5*ppi, height=h*ppi, res=ppi)
p = ggplot(D, aes(x=granul, y=mtime, linetype=gitBranch, shape=nodes)) +
geom_line(aes(group=interaction(nodes, gitBranch))) +
geom_point(aes(y=time)) +
scale_y_continuous(trans=log2_trans()) +
labs(x="Granularity", y="Time (s)",
title="Creams granularity",
subtitle=input_file) +
theme_bw() +
theme(plot.subtitle=element_text(size=8))
print(p)
dev.off()

View File

@ -0,0 +1,97 @@
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
library(scales)
library(jsonlite)
library(viridis, warn.conflicts = FALSE)
library(stringr)
args = commandArgs(trailingOnly=TRUE)
# Set the input dataset if given in argv[1], or use "input" as default
if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
jsonlite::flatten() %>%
select(unit,
config.nodes,
config.gitBranch,
config.granul,
config.iterations,
time,
total_time) %>%
rename(nodes=config.nodes,
gitBranch=config.gitBranch,
granul=config.granul,
iterations=config.iterations) %>%
# Remove the "garlic/" prefix from the gitBranch
mutate(branch = str_replace(gitBranch, "garlic/", "")) %>%
# Computations before converting to factor
mutate(time.iter = time / iterations) %>%
# Convert to factors
mutate(unit = as.factor(unit)) %>%
mutate(nodesFactor = as.factor(nodes)) %>%
mutate(gitBranch = as.factor(gitBranch)) %>%
mutate(granul = as.factor(granul)) %>%
mutate(iterations = as.factor(iterations)) %>%
mutate(unit = as.factor(unit)) %>%
# Compute median times
group_by(unit) %>%
mutate(median.time = median(time)) %>%
mutate(normalized.time = time / median.time - 1) %>%
mutate(log.median.time = log(median.time)) %>%
mutate(median.time.iter = median(time.iter)) %>%
ungroup()
dpi = 300
h = 6
w = 6
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=granul, y=normalized.time)) +
geom_boxplot() +
geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="granul", y="Normalized time",
title="Creams granularity: normalized time",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=granul, y=time)) +
geom_point(shape=21, size=3) +
geom_line(aes(y=median.time, group=iterations)) +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="granul", y="Time (s)", title="Creams granularity: time",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=granul, y=time.iter, color=iterations)) +
geom_point(shape=21, size=3) +
geom_line(aes(y=median.time.iter, group=iterations)) +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="granul", y="Time (s)", title="Creams granularity: time / iterations",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("time.iter.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.iter.pdf", plot=p, width=w, height=h, dpi=dpi)

View File

@ -1,107 +1,127 @@
library(ggplot2)
library(dplyr)
library(dplyr, warn.conflicts = FALSE)
library(scales)
library(jsonlite)
library(viridis, warn.conflicts = FALSE)
library(stringr)
args=commandArgs(trailingOnly=TRUE)
args = commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1]
input_file = "input.json"
if (length(args)>0) { input_file = args[1] }
# Set the input dataset if given in argv[1], or use "input" as default
if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
jsonlite::flatten()
df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
# We only need some colums
df = select(dataset, unit, config.nodes, config.gitBranch, time) %>%
rename(nodes=config.nodes, gitBranch=config.gitBranch)
jsonlite::flatten() %>%
df$unit = as.factor(df$unit)
df$nnodes = df$nodes
df$nodes = as.factor(df$nodes)
df$gitBranch = as.factor(df$gitBranch)
select(unit,
config.nodes,
config.gitBranch,
config.granul,
config.iterations,
time,
total_time) %>%
# Remove the "garlic/" prefix from the gitBranch
levels(df$gitBranch) <- substring((levels(df$gitBranch)), 8)
rename(nodes=config.nodes,
gitBranch=config.gitBranch,
granul=config.granul,
iterations=config.iterations) %>%
# Compute new columns
D=group_by(df, unit) %>%
mutate(tnorm = time / median(time) - 1) %>%
mutate(bad = ifelse(max(abs(tnorm)) >= 0.01, 1, 0)) %>%
mutate(variability = ifelse(bad > 0, "large", "ok")) %>%
mutate(mtime = median(time)) %>%
mutate(nmtime = mtime*nnodes) %>%
mutate(ntime = time*nnodes) %>%
ungroup() %>%
mutate(min_nmtime = min(nmtime)) %>%
mutate(rnmtime = nmtime / min_nmtime) %>%
mutate(rntime = ntime / min_nmtime) %>%
mutate(rmeff = 1.0 / rnmtime) %>%
mutate(reff = 1.0 / rntime) %>%
group_by(gitBranch) %>%
mutate(tmax = max(mtime)) %>%
mutate(speedup=tmax/time) %>%
mutate(eff=speedup/nnodes) %>%
mutate(mspeedup=tmax/mtime) %>%
mutate(meff=mspeedup/nnodes) %>%
# Remove the "garlic/" prefix from the gitBranch
mutate(branch = str_replace(gitBranch, "garlic/", "")) %>%
# Computations before converting to factor
mutate(time.nodes = time * nodes) %>%
mutate(time.nodes.iter = time.nodes / iterations) %>%
# Convert to factors
mutate(unit = as.factor(unit)) %>%
mutate(nodes = as.factor(nodes)) %>%
mutate(gitBranch = as.factor(gitBranch)) %>%
mutate(granul = as.factor(granul)) %>%
mutate(iterations = as.factor(iterations)) %>%
mutate(unit = as.factor(unit)) %>%
# Compute median times
group_by(unit) %>%
mutate(median.time = median(time)) %>%
mutate(median.time.nodes = median(time.nodes)) %>%
mutate(normalized.time = time / median.time - 1) %>%
mutate(log.median.time = log(median.time)) %>%
mutate(median.time.nodes.iter = median(time.nodes.iter)) %>%
ungroup()
D$bad = as.factor(D$bad > 0)
D$variability = as.factor(D$variability)
dpi = 300
h = 5
w = 8
ppi=300
h=5
w=5
# ---------------------------------------------------------------------
png("variability.png", width=1.5*w*ppi, height=h*ppi, res=ppi)
p = ggplot(data=D, aes(x=nodes, y=tnorm, color=variability)) +
p = ggplot(df, aes(x=nodes, y=normalized.time, fill=granul, color=iterations)) +
geom_boxplot() +
geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
theme_bw() +
theme(plot.subtitle=element_text(size=8)) +
# Add the maximum allowed error lines
geom_hline(yintercept=c(-0.01, 0.01),
linetype="dashed", color="gray") +
# Draw boxplots
geom_boxplot(aes(fill=gitBranch)) +
scale_color_manual(values=c("brown", "black")) +
# Labels
labs(x="Nodes", y="Normalized time", title="Creams strong scaling",
subtitle=input_file)
print(p)
dev.off()
facet_wrap(branch ~ .) +
labs(x="nodes", y="Normalized time",
title="Creams strong scaling: normalized time",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
png("time.png", width=w*1.5*ppi, height=h*ppi, res=ppi)
p = ggplot(D, aes(x=nodes, y=mtime, color=gitBranch)) +
theme_bw() +
theme(plot.subtitle=element_text(size=8)) +
geom_line(aes(group=gitBranch)) +
#geom_point() +
geom_point(aes(shape=variability), size=3) +
scale_shape_manual(values=c(21, 19)) +
# position=position_dodge(width=0.3)) +
#scale_x_continuous(trans=log2_trans()) +
scale_y_continuous(trans=log2_trans()) +
labs(x="Nodes", y="Time (s)",
title="Creams strong scaling (lower is better)",
subtitle=input_file)
print(p)
dev.off()
ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
png("refficiency.png", width=w*1.5*ppi, height=h*ppi, res=ppi)
p = ggplot(D, aes(x=nodes, y=rmeff, color=gitBranch)) +
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=nodes, y=time, color=gitBranch)) +
geom_point(shape=21, size=3) +
geom_line(aes(y=median.time, group=gitBranch)) +
theme_bw() +
theme(plot.subtitle=element_text(size=8)) +
geom_line(aes(group=gitBranch)) +
geom_point(aes(shape=variability), size=3) +
#geom_boxplot(aes(y=reff),
# position=position_dodge(width=0.0)) +
scale_shape_manual(values=c(21, 19)) +
#geom_point(aes(y=rntime),
# position=position_dodge(width=0.3)) +
#scale_x_continuous(trans=log2_trans()) +
#scale_y_continuous(trans=log2_trans()) +
labs(x="Nodes", y="Relative efficiency (to best)",
title="Creams strong scaling (higher is better)",
subtitle=input_file)
print(p)
dev.off()
# facet_wrap(branch ~ .) +
labs(x="nodes", y="Time (s)", title="Creams strong scaling: time",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=nodes, y=median.time.nodes, color=branch)) +
geom_point(shape=21, size=3) +
geom_line(aes(group=branch)) +
theme_bw() +
#facet_wrap(branch ~ .) +
labs(x="nodes", y="Median time * nodes (s)", title="Creams strong scaling: median time * nodes",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("median.time.nodes.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("median.time.nodes.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=nodes, y=time.nodes, color=branch)) +
geom_boxplot() +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="nodes", y="Time * nodes (s)", title="Creams strong scaling: time * nodes",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("time.nodes.boxplot.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.nodes.boxplot.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
#p = ggplot(df, aes(x=nodes, y=time.nodes.iter, color=branch)) +
# geom_point(shape=21, size=3) +
# geom_line(aes(y=median.time.nodes.iter, group=interaction(granul,iterations))) +
# theme_bw() +
# #facet_wrap(branch ~ .) +
# labs(x="nodes", y="Time * nodes / iterations (s)",
# title="Creams strong scaling: time * nodes / iterations",
# subtitle=input_file) +
# theme(plot.subtitle=element_text(size=8))
#
#ggsave("time.nodes.iter.png", plot=p, width=w, height=h, dpi=dpi)
#ggsave("time.nodes.iter.pdf", plot=p, width=w, height=h, dpi=dpi)

View File

@ -52,12 +52,12 @@ in
};
creams = with exp.creams; {
ss = stdPlot ./creams/ss.R [ ss.hybrid ss.pure ];
gran1 = stdPlot ./creams/gran.R [ gran.node1 ];
gran2 = stdPlot ./creams/gran.R [ gran.node2 ];
gran4 = stdPlot ./creams/gran.R [ gran.node4 ];
gran8 = stdPlot ./creams/gran.R [ gran.node8 ];
gran16 = stdPlot ./creams/gran.R [ gran.node16 ];
ss = stdPlot ./creams/ss.R [ ss ];
granularity = stdPlot ./creams/granularity.R [ granularity ];
# Extended version (we could use another R script for those plots
big.ss = stdPlot ./creams/ss.R [ big.ss ];
big.granularity = stdPlot ./creams/granularity.R [ big.granularity ];
};
osu = with exp.osu; {