creams: update figures using one single pipeline

This commit is contained in:
Rodrigo Arias 2021-03-30 15:59:52 +02:00
parent 87f751185c
commit 76deac0a63
4 changed files with 212 additions and 166 deletions

View File

@ -1,71 +0,0 @@
library(ggplot2)
library(dplyr)
library(scales)
library(jsonlite)
args=commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1]
input_file = "input.json"
if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
jsonlite::flatten()
# We only need some colums
df = select(dataset, unit, config.nodes, config.gitBranch,
config.granul, time, total_time) %>%
rename(nodes=config.nodes, gitBranch=config.gitBranch,
granul=config.granul)
df$unit = as.factor(df$unit)
df$nnodes = df$nodes
df$nodes = as.factor(df$nodes)
df$gitBranch = as.factor(df$gitBranch)
df$granul = as.factor(df$granul)
# Remove the "garlic/" prefix from the gitBranch
levels(df$gitBranch) <- substring((levels(df$gitBranch)), 8)
# Compute new columns
D=group_by(df, unit) %>%
mutate(tnorm = time / median(time) - 1) %>%
mutate(bad = ifelse(max(abs(tnorm)) >= 0.01, 1, 0)) %>%
mutate(variability = ifelse(bad > 0, "large", "ok")) %>%
mutate(mtime = median(time)) %>%
mutate(nmtime = mtime*nnodes) %>%
mutate(ntime = time*nnodes) %>%
ungroup() %>%
mutate(min_nmtime = min(nmtime)) %>%
mutate(rnmtime = nmtime / min_nmtime) %>%
mutate(rntime = ntime / min_nmtime) %>%
mutate(rmeff = 1.0 / rnmtime) %>%
mutate(reff = 1.0 / rntime) %>%
group_by(gitBranch) %>%
mutate(tmax = max(mtime)) %>%
mutate(speedup=tmax/time) %>%
mutate(eff=speedup/nnodes) %>%
mutate(mspeedup=tmax/mtime) %>%
mutate(meff=mspeedup/nnodes) %>%
ungroup()
D$bad = as.factor(D$bad > 0)
D$variability = as.factor(D$variability)
ppi=300
h=5
w=5
png("time.png", width=w*1.5*ppi, height=h*ppi, res=ppi)
p = ggplot(D, aes(x=granul, y=mtime, linetype=gitBranch, shape=nodes)) +
geom_line(aes(group=interaction(nodes, gitBranch))) +
geom_point(aes(y=time)) +
scale_y_continuous(trans=log2_trans()) +
labs(x="Granularity", y="Time (s)",
title="Creams granularity",
subtitle=input_file) +
theme_bw() +
theme(plot.subtitle=element_text(size=8))
print(p)
dev.off()

View File

@ -0,0 +1,97 @@
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
library(scales)
library(jsonlite)
library(viridis, warn.conflicts = FALSE)
library(stringr)
args = commandArgs(trailingOnly=TRUE)
# Set the input dataset if given in argv[1], or use "input" as default
if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
jsonlite::flatten() %>%
select(unit,
config.nodes,
config.gitBranch,
config.granul,
config.iterations,
time,
total_time) %>%
rename(nodes=config.nodes,
gitBranch=config.gitBranch,
granul=config.granul,
iterations=config.iterations) %>%
# Remove the "garlic/" prefix from the gitBranch
mutate(branch = str_replace(gitBranch, "garlic/", "")) %>%
# Computations before converting to factor
mutate(time.iter = time / iterations) %>%
# Convert to factors
mutate(unit = as.factor(unit)) %>%
mutate(nodesFactor = as.factor(nodes)) %>%
mutate(gitBranch = as.factor(gitBranch)) %>%
mutate(granul = as.factor(granul)) %>%
mutate(iterations = as.factor(iterations)) %>%
mutate(unit = as.factor(unit)) %>%
# Compute median times
group_by(unit) %>%
mutate(median.time = median(time)) %>%
mutate(normalized.time = time / median.time - 1) %>%
mutate(log.median.time = log(median.time)) %>%
mutate(median.time.iter = median(time.iter)) %>%
ungroup()
dpi = 300
h = 6
w = 6
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=granul, y=normalized.time)) +
geom_boxplot() +
geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="granul", y="Normalized time",
title="Creams granularity: normalized time",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=granul, y=time)) +
geom_point(shape=21, size=3) +
geom_line(aes(y=median.time, group=iterations)) +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="granul", y="Time (s)", title="Creams granularity: time",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=granul, y=time.iter, color=iterations)) +
geom_point(shape=21, size=3) +
geom_line(aes(y=median.time.iter, group=iterations)) +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="granul", y="Time (s)", title="Creams granularity: time / iterations",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("time.iter.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.iter.pdf", plot=p, width=w, height=h, dpi=dpi)

View File

@ -1,107 +1,127 @@
library(ggplot2) library(ggplot2)
library(dplyr) library(dplyr, warn.conflicts = FALSE)
library(scales) library(scales)
library(jsonlite) library(jsonlite)
library(viridis, warn.conflicts = FALSE)
library(stringr)
args = commandArgs(trailingOnly=TRUE) args = commandArgs(trailingOnly=TRUE)
# Read the timetable from args[1] # Set the input dataset if given in argv[1], or use "input" as default
input_file = "input.json" if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
jsonlite::flatten()
# We only need some colums jsonlite::flatten() %>%
df = select(dataset, unit, config.nodes, config.gitBranch, time) %>%
rename(nodes=config.nodes, gitBranch=config.gitBranch)
df$unit = as.factor(df$unit) select(unit,
df$nnodes = df$nodes config.nodes,
df$nodes = as.factor(df$nodes) config.gitBranch,
df$gitBranch = as.factor(df$gitBranch) config.granul,
config.iterations,
time,
total_time) %>%
rename(nodes=config.nodes,
gitBranch=config.gitBranch,
granul=config.granul,
iterations=config.iterations) %>%
# Remove the "garlic/" prefix from the gitBranch # Remove the "garlic/" prefix from the gitBranch
levels(df$gitBranch) <- substring((levels(df$gitBranch)), 8) mutate(branch = str_replace(gitBranch, "garlic/", "")) %>%
# Compute new columns # Computations before converting to factor
D=group_by(df, unit) %>% mutate(time.nodes = time * nodes) %>%
mutate(tnorm = time / median(time) - 1) %>% mutate(time.nodes.iter = time.nodes / iterations) %>%
mutate(bad = ifelse(max(abs(tnorm)) >= 0.01, 1, 0)) %>%
mutate(variability = ifelse(bad > 0, "large", "ok")) %>% # Convert to factors
mutate(mtime = median(time)) %>% mutate(unit = as.factor(unit)) %>%
mutate(nmtime = mtime*nnodes) %>% mutate(nodes = as.factor(nodes)) %>%
mutate(ntime = time*nnodes) %>% mutate(gitBranch = as.factor(gitBranch)) %>%
ungroup() %>% mutate(granul = as.factor(granul)) %>%
mutate(min_nmtime = min(nmtime)) %>% mutate(iterations = as.factor(iterations)) %>%
mutate(rnmtime = nmtime / min_nmtime) %>% mutate(unit = as.factor(unit)) %>%
mutate(rntime = ntime / min_nmtime) %>%
mutate(rmeff = 1.0 / rnmtime) %>% # Compute median times
mutate(reff = 1.0 / rntime) %>% group_by(unit) %>%
group_by(gitBranch) %>% mutate(median.time = median(time)) %>%
mutate(tmax = max(mtime)) %>% mutate(median.time.nodes = median(time.nodes)) %>%
mutate(speedup=tmax/time) %>% mutate(normalized.time = time / median.time - 1) %>%
mutate(eff=speedup/nnodes) %>% mutate(log.median.time = log(median.time)) %>%
mutate(mspeedup=tmax/mtime) %>% mutate(median.time.nodes.iter = median(time.nodes.iter)) %>%
mutate(meff=mspeedup/nnodes) %>%
ungroup() ungroup()
D$bad = as.factor(D$bad > 0) dpi = 300
D$variability = as.factor(D$variability)
ppi=300
h = 5 h = 5
w=5 w = 8
png("variability.png", width=1.5*w*ppi, height=h*ppi, res=ppi) # ---------------------------------------------------------------------
p = ggplot(data=D, aes(x=nodes, y=tnorm, color=variability)) +
theme_bw() +
theme(plot.subtitle=element_text(size=8)) +
# Add the maximum allowed error lines
geom_hline(yintercept=c(-0.01, 0.01),
linetype="dashed", color="gray") +
# Draw boxplots
geom_boxplot(aes(fill=gitBranch)) +
scale_color_manual(values=c("brown", "black")) +
# Labels
labs(x="Nodes", y="Normalized time", title="Creams strong scaling",
subtitle=input_file)
print(p)
dev.off()
png("time.png", width=w*1.5*ppi, height=h*ppi, res=ppi) p = ggplot(df, aes(x=nodes, y=normalized.time, fill=granul, color=iterations)) +
p = ggplot(D, aes(x=nodes, y=mtime, color=gitBranch)) + geom_boxplot() +
geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
theme_bw() + theme_bw() +
theme(plot.subtitle=element_text(size=8)) + facet_wrap(branch ~ .) +
geom_line(aes(group=gitBranch)) + labs(x="nodes", y="Normalized time",
#geom_point() + title="Creams strong scaling: normalized time",
geom_point(aes(shape=variability), size=3) + subtitle=input_file) +
scale_shape_manual(values=c(21, 19)) + theme(plot.subtitle=element_text(size=8))
# position=position_dodge(width=0.3)) +
#scale_x_continuous(trans=log2_trans()) +
scale_y_continuous(trans=log2_trans()) +
labs(x="Nodes", y="Time (s)",
title="Creams strong scaling (lower is better)",
subtitle=input_file)
print(p)
dev.off()
png("refficiency.png", width=w*1.5*ppi, height=h*ppi, res=ppi) ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
p = ggplot(D, aes(x=nodes, y=rmeff, color=gitBranch)) + ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=nodes, y=time, color=gitBranch)) +
geom_point(shape=21, size=3) +
geom_line(aes(y=median.time, group=gitBranch)) +
theme_bw() + theme_bw() +
theme(plot.subtitle=element_text(size=8)) + # facet_wrap(branch ~ .) +
geom_line(aes(group=gitBranch)) + labs(x="nodes", y="Time (s)", title="Creams strong scaling: time",
geom_point(aes(shape=variability), size=3) + subtitle=input_file) +
#geom_boxplot(aes(y=reff), theme(plot.subtitle=element_text(size=8))
# position=position_dodge(width=0.0)) +
scale_shape_manual(values=c(21, 19)) + ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
#geom_point(aes(y=rntime), ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
# position=position_dodge(width=0.3)) +
#scale_x_continuous(trans=log2_trans()) + # ---------------------------------------------------------------------
#scale_y_continuous(trans=log2_trans()) +
labs(x="Nodes", y="Relative efficiency (to best)", p = ggplot(df, aes(x=nodes, y=median.time.nodes, color=branch)) +
title="Creams strong scaling (higher is better)", geom_point(shape=21, size=3) +
subtitle=input_file) geom_line(aes(group=branch)) +
print(p) theme_bw() +
dev.off() #facet_wrap(branch ~ .) +
labs(x="nodes", y="Median time * nodes (s)", title="Creams strong scaling: median time * nodes",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("median.time.nodes.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("median.time.nodes.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
p = ggplot(df, aes(x=nodes, y=time.nodes, color=branch)) +
geom_boxplot() +
theme_bw() +
facet_wrap(branch ~ .) +
labs(x="nodes", y="Time * nodes (s)", title="Creams strong scaling: time * nodes",
subtitle=input_file) +
theme(plot.subtitle=element_text(size=8))
ggsave("time.nodes.boxplot.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.nodes.boxplot.pdf", plot=p, width=w, height=h, dpi=dpi)
# ---------------------------------------------------------------------
#p = ggplot(df, aes(x=nodes, y=time.nodes.iter, color=branch)) +
# geom_point(shape=21, size=3) +
# geom_line(aes(y=median.time.nodes.iter, group=interaction(granul,iterations))) +
# theme_bw() +
# #facet_wrap(branch ~ .) +
# labs(x="nodes", y="Time * nodes / iterations (s)",
# title="Creams strong scaling: time * nodes / iterations",
# subtitle=input_file) +
# theme(plot.subtitle=element_text(size=8))
#
#ggsave("time.nodes.iter.png", plot=p, width=w, height=h, dpi=dpi)
#ggsave("time.nodes.iter.pdf", plot=p, width=w, height=h, dpi=dpi)

View File

@ -52,12 +52,12 @@ in
}; };
creams = with exp.creams; { creams = with exp.creams; {
ss = stdPlot ./creams/ss.R [ ss.hybrid ss.pure ]; ss = stdPlot ./creams/ss.R [ ss ];
gran1 = stdPlot ./creams/gran.R [ gran.node1 ]; granularity = stdPlot ./creams/granularity.R [ granularity ];
gran2 = stdPlot ./creams/gran.R [ gran.node2 ];
gran4 = stdPlot ./creams/gran.R [ gran.node4 ]; # Extended version (we could use another R script for those plots
gran8 = stdPlot ./creams/gran.R [ gran.node8 ]; big.ss = stdPlot ./creams/ss.R [ big.ss ];
gran16 = stdPlot ./creams/gran.R [ gran.node16 ]; big.granularity = stdPlot ./creams/granularity.R [ big.granularity ];
}; };
osu = with exp.osu; { osu = with exp.osu; {