From 76deac0a63897786bc11cab5f156d22b04db082a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 30 Mar 2021 15:59:52 +0200 Subject: [PATCH] creams: update figures using one single pipeline --- garlic/fig/creams/gran.R | 71 ------------ garlic/fig/creams/granularity.R | 97 ++++++++++++++++ garlic/fig/creams/ss.R | 198 ++++++++++++++++++-------------- garlic/fig/index.nix | 12 +- 4 files changed, 212 insertions(+), 166 deletions(-) delete mode 100644 garlic/fig/creams/gran.R create mode 100644 garlic/fig/creams/granularity.R diff --git a/garlic/fig/creams/gran.R b/garlic/fig/creams/gran.R deleted file mode 100644 index d0d3149..0000000 --- a/garlic/fig/creams/gran.R +++ /dev/null @@ -1,71 +0,0 @@ -library(ggplot2) -library(dplyr) -library(scales) -library(jsonlite) - -args=commandArgs(trailingOnly=TRUE) - -# Read the timetable from args[1] -input_file = "input.json" -if (length(args)>0) { input_file = args[1] } - -# Load the dataset in NDJSON format -dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>% - jsonlite::flatten() - -# We only need some colums -df = select(dataset, unit, config.nodes, config.gitBranch, - config.granul, time, total_time) %>% - rename(nodes=config.nodes, gitBranch=config.gitBranch, - granul=config.granul) - -df$unit = as.factor(df$unit) -df$nnodes = df$nodes -df$nodes = as.factor(df$nodes) -df$gitBranch = as.factor(df$gitBranch) -df$granul = as.factor(df$granul) - -# Remove the "garlic/" prefix from the gitBranch -levels(df$gitBranch) <- substring((levels(df$gitBranch)), 8) - -# Compute new columns -D=group_by(df, unit) %>% - mutate(tnorm = time / median(time) - 1) %>% - mutate(bad = ifelse(max(abs(tnorm)) >= 0.01, 1, 0)) %>% - mutate(variability = ifelse(bad > 0, "large", "ok")) %>% - mutate(mtime = median(time)) %>% - mutate(nmtime = mtime*nnodes) %>% - mutate(ntime = time*nnodes) %>% - ungroup() %>% - mutate(min_nmtime = min(nmtime)) %>% - mutate(rnmtime = nmtime / min_nmtime) %>% - mutate(rntime = ntime / min_nmtime) %>% - mutate(rmeff = 1.0 / rnmtime) %>% - mutate(reff = 1.0 / rntime) %>% - group_by(gitBranch) %>% - mutate(tmax = max(mtime)) %>% - mutate(speedup=tmax/time) %>% - mutate(eff=speedup/nnodes) %>% - mutate(mspeedup=tmax/mtime) %>% - mutate(meff=mspeedup/nnodes) %>% - ungroup() - -D$bad = as.factor(D$bad > 0) -D$variability = as.factor(D$variability) - -ppi=300 -h=5 -w=5 - -png("time.png", width=w*1.5*ppi, height=h*ppi, res=ppi) -p = ggplot(D, aes(x=granul, y=mtime, linetype=gitBranch, shape=nodes)) + - geom_line(aes(group=interaction(nodes, gitBranch))) + - geom_point(aes(y=time)) + - scale_y_continuous(trans=log2_trans()) + - labs(x="Granularity", y="Time (s)", - title="Creams granularity", - subtitle=input_file) + - theme_bw() + - theme(plot.subtitle=element_text(size=8)) -print(p) -dev.off() diff --git a/garlic/fig/creams/granularity.R b/garlic/fig/creams/granularity.R new file mode 100644 index 0000000..d77740a --- /dev/null +++ b/garlic/fig/creams/granularity.R @@ -0,0 +1,97 @@ +library(ggplot2) +library(dplyr, warn.conflicts = FALSE) +library(scales) +library(jsonlite) +library(viridis, warn.conflicts = FALSE) +library(stringr) + +args = commandArgs(trailingOnly=TRUE) + +# Set the input dataset if given in argv[1], or use "input" as default +if (length(args)>0) { input_file = args[1] } else { input_file = "input" } + +df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>% + + jsonlite::flatten() %>% + + select(unit, + config.nodes, + config.gitBranch, + config.granul, + config.iterations, + time, + total_time) %>% + + rename(nodes=config.nodes, + gitBranch=config.gitBranch, + granul=config.granul, + iterations=config.iterations) %>% + + # Remove the "garlic/" prefix from the gitBranch + mutate(branch = str_replace(gitBranch, "garlic/", "")) %>% + + # Computations before converting to factor + mutate(time.iter = time / iterations) %>% + + # Convert to factors + mutate(unit = as.factor(unit)) %>% + mutate(nodesFactor = as.factor(nodes)) %>% + mutate(gitBranch = as.factor(gitBranch)) %>% + mutate(granul = as.factor(granul)) %>% + mutate(iterations = as.factor(iterations)) %>% + mutate(unit = as.factor(unit)) %>% + + # Compute median times + group_by(unit) %>% + mutate(median.time = median(time)) %>% + mutate(normalized.time = time / median.time - 1) %>% + mutate(log.median.time = log(median.time)) %>% + mutate(median.time.iter = median(time.iter)) %>% + ungroup() + +dpi = 300 +h = 6 +w = 6 + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=granul, y=normalized.time)) + + geom_boxplot() + + geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") + + theme_bw() + + facet_wrap(branch ~ .) + + labs(x="granul", y="Normalized time", + title="Creams granularity: normalized time", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi) + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=granul, y=time)) + + geom_point(shape=21, size=3) + + geom_line(aes(y=median.time, group=iterations)) + + theme_bw() + + facet_wrap(branch ~ .) + + labs(x="granul", y="Time (s)", title="Creams granularity: time", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("time.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi) + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=granul, y=time.iter, color=iterations)) + + geom_point(shape=21, size=3) + + geom_line(aes(y=median.time.iter, group=iterations)) + + theme_bw() + + facet_wrap(branch ~ .) + + labs(x="granul", y="Time (s)", title="Creams granularity: time / iterations", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("time.iter.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("time.iter.pdf", plot=p, width=w, height=h, dpi=dpi) diff --git a/garlic/fig/creams/ss.R b/garlic/fig/creams/ss.R index 5f274a8..1f09842 100644 --- a/garlic/fig/creams/ss.R +++ b/garlic/fig/creams/ss.R @@ -1,107 +1,127 @@ library(ggplot2) -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(scales) library(jsonlite) +library(viridis, warn.conflicts = FALSE) +library(stringr) -args=commandArgs(trailingOnly=TRUE) +args = commandArgs(trailingOnly=TRUE) -# Read the timetable from args[1] -input_file = "input.json" -if (length(args)>0) { input_file = args[1] } +# Set the input dataset if given in argv[1], or use "input" as default +if (length(args)>0) { input_file = args[1] } else { input_file = "input" } -# Load the dataset in NDJSON format -dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>% - jsonlite::flatten() +df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>% -# We only need some colums -df = select(dataset, unit, config.nodes, config.gitBranch, time) %>% - rename(nodes=config.nodes, gitBranch=config.gitBranch) + jsonlite::flatten() %>% -df$unit = as.factor(df$unit) -df$nnodes = df$nodes -df$nodes = as.factor(df$nodes) -df$gitBranch = as.factor(df$gitBranch) + select(unit, + config.nodes, + config.gitBranch, + config.granul, + config.iterations, + time, + total_time) %>% -# Remove the "garlic/" prefix from the gitBranch -levels(df$gitBranch) <- substring((levels(df$gitBranch)), 8) + rename(nodes=config.nodes, + gitBranch=config.gitBranch, + granul=config.granul, + iterations=config.iterations) %>% -# Compute new columns -D=group_by(df, unit) %>% - mutate(tnorm = time / median(time) - 1) %>% - mutate(bad = ifelse(max(abs(tnorm)) >= 0.01, 1, 0)) %>% - mutate(variability = ifelse(bad > 0, "large", "ok")) %>% - mutate(mtime = median(time)) %>% - mutate(nmtime = mtime*nnodes) %>% - mutate(ntime = time*nnodes) %>% - ungroup() %>% - mutate(min_nmtime = min(nmtime)) %>% - mutate(rnmtime = nmtime / min_nmtime) %>% - mutate(rntime = ntime / min_nmtime) %>% - mutate(rmeff = 1.0 / rnmtime) %>% - mutate(reff = 1.0 / rntime) %>% - group_by(gitBranch) %>% - mutate(tmax = max(mtime)) %>% - mutate(speedup=tmax/time) %>% - mutate(eff=speedup/nnodes) %>% - mutate(mspeedup=tmax/mtime) %>% - mutate(meff=mspeedup/nnodes) %>% + # Remove the "garlic/" prefix from the gitBranch + mutate(branch = str_replace(gitBranch, "garlic/", "")) %>% + + # Computations before converting to factor + mutate(time.nodes = time * nodes) %>% + mutate(time.nodes.iter = time.nodes / iterations) %>% + + # Convert to factors + mutate(unit = as.factor(unit)) %>% + mutate(nodes = as.factor(nodes)) %>% + mutate(gitBranch = as.factor(gitBranch)) %>% + mutate(granul = as.factor(granul)) %>% + mutate(iterations = as.factor(iterations)) %>% + mutate(unit = as.factor(unit)) %>% + + # Compute median times + group_by(unit) %>% + mutate(median.time = median(time)) %>% + mutate(median.time.nodes = median(time.nodes)) %>% + mutate(normalized.time = time / median.time - 1) %>% + mutate(log.median.time = log(median.time)) %>% + mutate(median.time.nodes.iter = median(time.nodes.iter)) %>% ungroup() -D$bad = as.factor(D$bad > 0) -D$variability = as.factor(D$variability) +dpi = 300 +h = 5 +w = 8 -ppi=300 -h=5 -w=5 +# --------------------------------------------------------------------- -png("variability.png", width=1.5*w*ppi, height=h*ppi, res=ppi) -p = ggplot(data=D, aes(x=nodes, y=tnorm, color=variability)) + +p = ggplot(df, aes(x=nodes, y=normalized.time, fill=granul, color=iterations)) + + geom_boxplot() + + geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") + theme_bw() + - theme(plot.subtitle=element_text(size=8)) + - # Add the maximum allowed error lines - geom_hline(yintercept=c(-0.01, 0.01), - linetype="dashed", color="gray") + - # Draw boxplots - geom_boxplot(aes(fill=gitBranch)) + - scale_color_manual(values=c("brown", "black")) + - # Labels - labs(x="Nodes", y="Normalized time", title="Creams strong scaling", - subtitle=input_file) -print(p) -dev.off() + facet_wrap(branch ~ .) + + labs(x="nodes", y="Normalized time", + title="Creams strong scaling: normalized time", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) -png("time.png", width=w*1.5*ppi, height=h*ppi, res=ppi) -p = ggplot(D, aes(x=nodes, y=mtime, color=gitBranch)) + - theme_bw() + - theme(plot.subtitle=element_text(size=8)) + - geom_line(aes(group=gitBranch)) + - #geom_point() + - geom_point(aes(shape=variability), size=3) + - scale_shape_manual(values=c(21, 19)) + - # position=position_dodge(width=0.3)) + - #scale_x_continuous(trans=log2_trans()) + - scale_y_continuous(trans=log2_trans()) + - labs(x="Nodes", y="Time (s)", - title="Creams strong scaling (lower is better)", - subtitle=input_file) -print(p) -dev.off() +ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi) -png("refficiency.png", width=w*1.5*ppi, height=h*ppi, res=ppi) -p = ggplot(D, aes(x=nodes, y=rmeff, color=gitBranch)) + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=nodes, y=time, color=gitBranch)) + + geom_point(shape=21, size=3) + + geom_line(aes(y=median.time, group=gitBranch)) + theme_bw() + - theme(plot.subtitle=element_text(size=8)) + - geom_line(aes(group=gitBranch)) + - geom_point(aes(shape=variability), size=3) + - #geom_boxplot(aes(y=reff), - # position=position_dodge(width=0.0)) + - scale_shape_manual(values=c(21, 19)) + - #geom_point(aes(y=rntime), - # position=position_dodge(width=0.3)) + - #scale_x_continuous(trans=log2_trans()) + - #scale_y_continuous(trans=log2_trans()) + - labs(x="Nodes", y="Relative efficiency (to best)", - title="Creams strong scaling (higher is better)", - subtitle=input_file) -print(p) -dev.off() +# facet_wrap(branch ~ .) + + labs(x="nodes", y="Time (s)", title="Creams strong scaling: time", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("time.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi) + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=nodes, y=median.time.nodes, color=branch)) + + geom_point(shape=21, size=3) + + geom_line(aes(group=branch)) + + theme_bw() + + #facet_wrap(branch ~ .) + + labs(x="nodes", y="Median time * nodes (s)", title="Creams strong scaling: median time * nodes", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("median.time.nodes.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("median.time.nodes.pdf", plot=p, width=w, height=h, dpi=dpi) + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=nodes, y=time.nodes, color=branch)) + + geom_boxplot() + + theme_bw() + + facet_wrap(branch ~ .) + + labs(x="nodes", y="Time * nodes (s)", title="Creams strong scaling: time * nodes", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("time.nodes.boxplot.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("time.nodes.boxplot.pdf", plot=p, width=w, height=h, dpi=dpi) + +# --------------------------------------------------------------------- + +#p = ggplot(df, aes(x=nodes, y=time.nodes.iter, color=branch)) + +# geom_point(shape=21, size=3) + +# geom_line(aes(y=median.time.nodes.iter, group=interaction(granul,iterations))) + +# theme_bw() + +# #facet_wrap(branch ~ .) + +# labs(x="nodes", y="Time * nodes / iterations (s)", +# title="Creams strong scaling: time * nodes / iterations", +# subtitle=input_file) + +# theme(plot.subtitle=element_text(size=8)) +# +#ggsave("time.nodes.iter.png", plot=p, width=w, height=h, dpi=dpi) +#ggsave("time.nodes.iter.pdf", plot=p, width=w, height=h, dpi=dpi) diff --git a/garlic/fig/index.nix b/garlic/fig/index.nix index 137c629..201d68f 100644 --- a/garlic/fig/index.nix +++ b/garlic/fig/index.nix @@ -52,12 +52,12 @@ in }; creams = with exp.creams; { - ss = stdPlot ./creams/ss.R [ ss.hybrid ss.pure ]; - gran1 = stdPlot ./creams/gran.R [ gran.node1 ]; - gran2 = stdPlot ./creams/gran.R [ gran.node2 ]; - gran4 = stdPlot ./creams/gran.R [ gran.node4 ]; - gran8 = stdPlot ./creams/gran.R [ gran.node8 ]; - gran16 = stdPlot ./creams/gran.R [ gran.node16 ]; + ss = stdPlot ./creams/ss.R [ ss ]; + granularity = stdPlot ./creams/granularity.R [ granularity ]; + + # Extended version (we could use another R script for those plots + big.ss = stdPlot ./creams/ss.R [ big.ss ]; + big.granularity = stdPlot ./creams/granularity.R [ big.granularity ]; }; osu = with exp.osu; {