osu: adjust figures for publication

This commit is contained in:
Rodrigo Arias 2021-04-09 16:02:28 +02:00
parent 821b4f0d15
commit 1cb63b464d
3 changed files with 120 additions and 72 deletions

View File

@ -2,6 +2,9 @@ library(ggplot2)
library(dplyr, warn.conflicts = FALSE) library(dplyr, warn.conflicts = FALSE)
library(scales) library(scales)
library(jsonlite) library(jsonlite)
library(stringr)
#library(extrafont)
#library(Cairo)
args=commandArgs(trailingOnly=TRUE) args=commandArgs(trailingOnly=TRUE)
@ -15,7 +18,9 @@ dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
# We only need the nblocks and time # We only need the nblocks and time
df = select(dataset, config.unitName, config.nodes, config.ntasksPerNode, config.cpusPerTask, size, bw) %>% df = select(dataset, config.unitName, config.nodes, config.ntasksPerNode, config.cpusPerTask, size, bw) %>%
rename(unitName=config.unitName) rename(unitName=config.unitName) %>%
mutate(bw=bw / 1024.0) %>%
mutate(unitName=str_replace(unitName, "osu-bw-", ""))
nodes = unique(df$config.nodes) nodes = unique(df$config.nodes)
tasksPerNode = unique(df$config.ntasksPerNode) tasksPerNode = unique(df$config.ntasksPerNode)
@ -24,42 +29,69 @@ df$unitName = as.factor(df$unitName)
df$sizeFactor = as.factor(df$size) df$sizeFactor = as.factor(df$size)
df = group_by(df, unitName, sizeFactor) %>% df = group_by(df, unitName, sizeFactor) %>%
mutate(medianBw = median(bw)) %>% mutate(median.bw = median(bw)) %>%
ungroup() ungroup()
breaks = 10^(-10:10)
minor_breaks <- rep(1:9, 21)*(10^rep(-10:10, each=9))
p = ggplot(data=df, aes(x=size, y=bw)) +
labs(x="Size (bytes)", y="Bandwidth (MB/s)",
title=sprintf("OSU bandwidth benchmark: nodes=%d tasksPerNode=%d cpusPerTask=%d",
nodes, tasksPerNode, cpusPerTask),
subtitle=input_file) +
geom_boxplot(aes(color=unitName, group=interaction(unitName, sizeFactor))) +
scale_x_continuous(trans=log2_trans()) +
#scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) +
theme_bw() +
theme(legend.position = c(0.8, 0.2))
ppi=300 ppi=300
h=4 h=3
w=8 w=6
ggsave("boxplot.pdf", plot=p, width=w, height=h, dpi=ppi)
ggsave("boxplot.png", plot=p, width=w, height=h, dpi=ppi)
p = ggplot(data=df, aes(x=size, y=medianBw)) + p = ggplot(data=df, aes(x=size, y=median.bw)) +
labs(x="Size (bytes)", y="Bandwidth (MB/s)", labs(x="Message size", y="Bandwidth (GB/s)",
title=sprintf("OSU benchmark: osu_bw", #title=sprintf("OSU benchmark: osu_bw", nodes, tasksPerNode, cpusPerTask),
nodes, tasksPerNode, cpusPerTask), subtitle=gsub("-", "\uad", input_file)) +
subtitle=input_file) + geom_line(aes(linetype=unitName)) +
geom_line(aes(color=unitName, linetype=unitName)) + geom_point(aes(shape=unitName), size=1.5) +
geom_point(aes(color=unitName, shape=unitName)) + scale_shape_discrete(name = "MPI version") +
geom_hline(yintercept = 100e3 / 8, color="red") + scale_linetype_discrete(name = "MPI version") +
annotate("text", x = 8, y = (100e3 / 8) * 0.95, label = "12.5GB/s (100Gb/s)") + #scale_color_discrete(name = "MPI version") +
scale_x_continuous(trans=log2_trans()) + geom_hline(yintercept=12.5, color="red") +
annotate("text", x=1, y=12.5 * .95,
label="Max: 12.5GB/s (100Gbps)",
hjust=0, vjust=1, size=3) +
#scale_x_continuous(trans=log2_trans()) +
scale_x_continuous(trans=log2_trans(),
labels=label_bytes("auto_binary"),
n.breaks = 12,
#breaks=unique(df$size),
#minor_breaks=NULL
) +
#scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) + #scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) +
theme_bw() + theme_bw() +
theme(legend.position = c(0.8, 0.2)) theme(plot.subtitle = element_text(size=8, family="mono")) +
theme(legend.justification = c(1,0), legend.position = c(0.99, 0.01)) +
theme(axis.text.x = element_text(angle=-45, hjust=0))
ggsave("median-lines.png", plot=p, width=w, height=h, dpi=ppi) ggsave("median-lines.png", plot=p, width=w, height=h, dpi=ppi)
ggsave("median-lines.pdf", plot=p, width=w, height=h, dpi=ppi) ggsave("median-lines.pdf", plot=p, width=w, height=h, dpi=ppi)
#ggsave("median-lines-cairo.pdf", plot=p, width=w, height=h, dpi=ppi, device=cairo_pdf)
#CairoPDF(file="median-lines-Cairo.pdf", width=w, height=h)
#print(p)
#dev.off()
p = ggplot(data=df, aes(x=size, y=bw)) +
labs(x="Message size", y="Bandwidth (MB/s)",
#title=sprintf("OSU benchmark: osu_bw", nodes, tasksPerNode, cpusPerTask),
subtitle=input_file) +
geom_line(aes(y=median.bw, linetype=unitName, group=unitName)) +
geom_point(aes(shape=unitName), size=2) +
scale_shape(solid = FALSE) +
geom_hline(yintercept = 100e3 / 8, color="red") +
annotate("text", x = 8, y = (100e3 / 8) * 0.95,
label = "Max: 12.5GB/s (100Gbps)") +
#scale_x_continuous(trans=log2_trans()) +
scale_x_continuous(trans=log2_trans(),
labels=label_bytes("auto_binary"),
breaks=unique(df$size),
minor_breaks=NULL) +
#scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) +
theme_bw() +
theme(plot.subtitle = element_text(size=4)) +
theme(legend.position = c(0.2, 0.6)) +
theme(axis.text.x = element_text(angle=-45, hjust=0))
ggsave("bw.png", plot=p, width=w, height=h, dpi=ppi)
ggsave("bw.pdf", plot=p, width=w, height=h, dpi=ppi)
warnings()

View File

@ -2,6 +2,7 @@ library(ggplot2)
library(dplyr, warn.conflicts = FALSE) library(dplyr, warn.conflicts = FALSE)
library(scales) library(scales)
library(jsonlite) library(jsonlite)
library(stringr)
args=commandArgs(trailingOnly=TRUE) args=commandArgs(trailingOnly=TRUE)
@ -15,7 +16,8 @@ dataset = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
# We only need the nblocks and time # We only need the nblocks and time
df = select(dataset, config.unitName, config.nodes, config.ntasksPerNode, config.cpusPerTask, size, latency) %>% df = select(dataset, config.unitName, config.nodes, config.ntasksPerNode, config.cpusPerTask, size, latency) %>%
rename(unitName=config.unitName) rename(unitName=config.unitName) %>%
mutate(unitName=str_replace(unitName, "osu-latency-", ""))
nodes = unique(df$config.nodes) nodes = unique(df$config.nodes)
tasksPerNode = unique(df$config.ntasksPerNode) tasksPerNode = unique(df$config.ntasksPerNode)
@ -30,34 +32,45 @@ df = group_by(df, unitName, sizeFactor) %>%
breaks = 10^(-10:10) breaks = 10^(-10:10)
minor_breaks <- rep(1:9, 21)*(10^rep(-10:10, each=9)) minor_breaks <- rep(1:9, 21)*(10^rep(-10:10, each=9))
p = ggplot(data=df, aes(x=size, y=latency)) +
labs(x="Size (bytes)", y="Latency (us)",
title=sprintf("OSU latency benchmark nodes=%d tasksPerNode=%d cpusPerTask=%d",
nodes, tasksPerNode, cpusPerTask),
subtitle=input_file) +
geom_boxplot(aes(color=unitName, group=interaction(unitName, sizeFactor))) +
scale_x_continuous(trans=log2_trans()) +
scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) +
theme_bw() +
theme(legend.position = c(0.8, 0.2))
ppi=300 ppi=300
h=4 h=3
w=8 w=6
ggsave("boxplot.png", plot=p, width=w, height=h, dpi=ppi)
ggsave("boxplot.pdf", plot=p, width=w, height=h, dpi=ppi)
p = ggplot(data=df, aes(x=size, y=medianLatency)) + p = ggplot(data=df, aes(x=size, y=medianLatency)) +
labs(x="Size (bytes)", y="Latency (us)", labs(x="Message size", y="Median latency (µs)",
title=sprintf("OSU benchmark: osu_latency", #title=sprintf("OSU benchmark: osu_latency", nodes, tasksPerNode, cpusPerTask),
nodes, tasksPerNode, cpusPerTask), subtitle=gsub("-", "\uad", input_file)) +
subtitle=input_file) + geom_line(aes(linetype=unitName)) +
geom_line(aes(color=unitName, linetype=unitName)) + geom_point(aes(shape=unitName), size=2) +
geom_point(aes(color=unitName, shape=unitName)) +
scale_x_continuous(trans=log2_trans()) +
scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) + scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) +
scale_x_continuous(trans=log2_trans(),
labels=label_bytes("auto_binary"),
n.breaks = 12)+
scale_shape_discrete(name = "MPI version") +
scale_linetype_discrete(name = "MPI version") +
theme_bw() + theme_bw() +
theme(legend.position = c(0.2, 0.8)) theme(plot.subtitle = element_text(size=8, family="mono")) +
theme(legend.justification = c(0,1), legend.position = c(0.01, 0.99)) +
theme(axis.text.x = element_text(angle=-45, hjust=0))
ggsave("median-lines.png", plot=p, width=w, height=h, dpi=ppi) ggsave("median-lines.png", plot=p, width=w, height=h, dpi=ppi)
ggsave("median-lines.pdf", plot=p, width=w, height=h, dpi=ppi) ggsave("median-lines.pdf", plot=p, width=w, height=h, dpi=ppi)
p = ggplot(data=df, aes(x=size, y=latency)) +
labs(x="Size (bytes)", y="Latency (us)",
#title=sprintf("OSU benchmark: osu_latency", nodes, tasksPerNode, cpusPerTask),
subtitle=input_file) +
geom_line(aes(y=medianLatency, linetype=unitName, group=unitName)) +
geom_point(aes(shape=unitName), size=2) +
scale_y_log10(breaks = breaks, minor_breaks = minor_breaks) +
scale_x_continuous(trans=log2_trans(),
labels=label_bytes("auto_binary"),
breaks=unique(df$size),
minor_breaks=NULL) +
theme_bw() +
theme(plot.subtitle = element_text(color="gray50")) +
theme(axis.text.x = element_text(angle=-45, hjust=0)) +
theme(legend.position = c(0.2, 0.8))
ggsave("latency.png", plot=p, width=w, height=h, dpi=ppi)
ggsave("latency.pdf", plot=p, width=w, height=h, dpi=ppi)

View File

@ -24,8 +24,9 @@ df = select(dataset,
size, bw, config.iterations) %>% size, bw, config.iterations) %>%
rename(unitName=config.unitName, rename(unitName=config.unitName,
iterations=config.iterations, iterations=config.iterations,
PSM2_MQ_EAGER_SDMA_SZ=config.PSM2_MQ_EAGER_SDMA_SZ, PSM2_MQ_EAGER_SDMA_SZ.val=config.PSM2_MQ_EAGER_SDMA_SZ,
PSM2_MTU=config.PSM2_MTU) PSM2_MTU.val=config.PSM2_MTU) %>%
mutate(bw = bw / 1000.0)
nodes = unique(df$config.nodes) nodes = unique(df$config.nodes)
tasksPerNode = unique(df$config.ntasksPerNode) tasksPerNode = unique(df$config.ntasksPerNode)
@ -33,33 +34,35 @@ cpusPerTask = unique(df$config.cpusPerTask)
df$unitName = as.factor(df$unitName) df$unitName = as.factor(df$unitName)
df$sizeFactor = as.factor(df$size) df$sizeFactor = as.factor(df$size)
df$sizeKB = df$size / 1024 df$sizeKB = df$size / 1024
df$PSM2_MQ_EAGER_SDMA_SZ.f = as.factor(df$PSM2_MQ_EAGER_SDMA_SZ) df$PSM2_MQ_EAGER_SDMA_SZ = as.factor(df$PSM2_MQ_EAGER_SDMA_SZ.val)
df$PSM2_MTU.f = as.factor(df$PSM2_MTU) df$PSM2_MTU = as.factor(df$PSM2_MTU.val)
iterations = unique(df$iterations) iterations = unique(df$iterations)
df = group_by(df, unitName, sizeFactor) %>% df = group_by(df, unitName, sizeFactor) %>%
mutate(medianBw = median(bw)) %>% mutate(median.bw = median(bw)) %>%
ungroup() ungroup()
breaks = 10^(-10:10) breaks = 10^(-10:10)
minor_breaks <- rep(1:9, 21)*(10^rep(-10:10, each=9)) minor_breaks <- rep(1:9, 21)*(10^rep(-10:10, each=9))
ppi=150 ppi=300
h=6 h=3
w=8 w=6
p = ggplot(data=df, aes(x=sizeKB, y=bw)) + p = ggplot(data=df, aes(x=sizeKB, y=bw)) +
labs(x="Message size (KB)", y="Bandwidth (MB/s)", geom_vline(aes(xintercept = PSM2_MQ_EAGER_SDMA_SZ.val/1024), color="blue") +
title=sprintf("OSU benchmark: osu_bw --iterations %d", iterations), geom_vline(aes(xintercept = PSM2_MTU.val/1024), color="red") +
subtitle=input_file) + labs(x="Message size (KiB)", y="Bandwidth (GB/s)",
geom_point(shape=21, size=3) + #title=sprintf("OSU benchmark: osu_bw --iterations %d", iterations),
geom_vline(aes(xintercept = PSM2_MQ_EAGER_SDMA_SZ/1024), color="blue") + subtitle=gsub("-", "\uad", input_file)) +
geom_vline(aes(xintercept = PSM2_MTU / 1024), color="red") + geom_point(shape=21, size=2) +
#annotate("text", x = 10.2, y = 8.5e3, label = "MTU = 10KB", color="red", hjust=0) + #annotate("text", x = 10.2, y = 8.5e3, label = "MTU = 10KB", color="red", hjust=0) +
facet_wrap(vars(PSM2_MTU.f), nrow=3, labeller = "label_both") + facet_wrap(vars(PSM2_MTU), nrow=3, labeller = "label_both") +
scale_x_continuous(breaks = unique(df$sizeKB), minor_breaks=NULL) + #scale_x_continuous(breaks = unique(df$sizeKB), minor_breaks=NULL) +
theme_bw() scale_x_continuous(n.breaks = 12) +
theme_bw() +
theme(plot.subtitle = element_text(size=8, family="mono"))
ggsave("bw.png", plot=p, width=w, height=h, dpi=ppi) ggsave("bw.png", plot=p, width=w, height=h, dpi=ppi)
ggsave("bw.pdf", plot=p, width=w, height=h, dpi=ppi) ggsave("bw.pdf", plot=p, width=w, height=h, dpi=ppi)