forked from rarias/jungle
		
	
		
			
				
	
	
		
			110 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			110 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| # This R program takes as argument the dataset that contains the results of the
 | ||
| # execution of the heat example experiment and produces some plots. All the
 | ||
| # knowledge to understand how this script works is covered by this nice R book:
 | ||
| #
 | ||
| # Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
 | ||
| # O’Reilly Media (2020). 2nd edition
 | ||
| #
 | ||
| # Which can be freely read it online here: https://r-graphics.org/
 | ||
| #
 | ||
| # Please, search in this book before copying some random (and probably oudated)
 | ||
| # reply on stack overflow.
 | ||
| 
 | ||
| # We load some R packages to import the required functions. We mainly use the
 | ||
| # tidyverse packages, which are very good for ploting data,
 | ||
| library(ggplot2)
 | ||
| library(dplyr, warn.conflicts = FALSE)
 | ||
| library(scales)
 | ||
| library(jsonlite)
 | ||
| library(viridis, warn.conflicts = FALSE)
 | ||
| 
 | ||
| # Here we simply load the arguments to find the input dataset. If nothing is
 | ||
| # specified we use the file named `input` in the current directory.
 | ||
| # We can run this script directly using:
 | ||
| # Rscript <path-to-this-script> <input-dataset>
 | ||
| 
 | ||
| # Load the arguments (argv)
 | ||
| args = commandArgs(trailingOnly=TRUE)
 | ||
| 
 | ||
| # Set the input dataset if given in argv[1], or use "input" as default
 | ||
| if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
 | ||
| 
 | ||
| df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
 | ||
| 
 | ||
|   # Then we flatten it, as it may contain dictionaries inside the columns
 | ||
|   jsonlite::flatten() %>%
 | ||
| 
 | ||
|   # Now the dataframe contains all the configuration of the units inside the
 | ||
|   # columns named `config.*`, for example `config.cbs`. We first select only
 | ||
|   # the columns that we need:
 | ||
|   select(config.nblocks,
 | ||
|          config.ncommblocks,
 | ||
|          config.hw.cpusPerSocket,
 | ||
|          config.nodes,
 | ||
|          config.nprocs.x,
 | ||
|          config.nprocs.y,
 | ||
|          config.nprocs.z,
 | ||
|          unit,
 | ||
|          time
 | ||
|          ) %>%
 | ||
| 
 | ||
|   # And then we rename those columns to something shorter:
 | ||
|   rename(nblocks=config.nblocks,
 | ||
|          ncommblocks=config.ncommblocks,
 | ||
|          cpusPerSocket=config.hw.cpusPerSocket,
 | ||
|          nodes=config.nodes,
 | ||
|          npx=config.nprocs.x,
 | ||
|          npy=config.nprocs.y,
 | ||
|          npz=config.nprocs.z
 | ||
|          ) %>%
 | ||
| 
 | ||
|   mutate(axisColor=as.factor(ifelse(npx != 1, "X", ifelse(npy != 1, "Y", "Z")))) %>%
 | ||
| 
 | ||
|   mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
 | ||
| 
 | ||
|   mutate(nblocks = as.factor(nblocks)) %>%
 | ||
|   mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
 | ||
|   mutate(nodes = as.factor(nodes)) %>%
 | ||
|   mutate(unit = as.factor(unit)) %>%
 | ||
| 
 | ||
|   mutate(timePerNprocs = time * npz) %>%
 | ||
| 
 | ||
|   group_by(unit) %>%
 | ||
| 
 | ||
|   # And compute some metrics which are applied to each group. For example we
 | ||
|   # compute the median time within the runs of a unit:
 | ||
|   mutate(median.time = median(time)) %>%
 | ||
|   mutate(normalized.time = time / median.time - 1) %>%
 | ||
|   mutate(log.median.time = log(median.time)) %>%
 | ||
| 
 | ||
|   # Then, we remove the grouping. This step is very important, otherwise the
 | ||
|   # plotting functions get confused:
 | ||
|   ungroup()
 | ||
| 
 | ||
| dpi=300
 | ||
| h=5
 | ||
| w=5
 | ||
| 
 | ||
| # We plot the time of each run as we vary the block size
 | ||
| p = ggplot(df, aes(x=nodes, y=timePerNprocs, color=blocksPerCpu)) +
 | ||
| 
 | ||
|   # We add a points (scatter plot) using circles (shape=21) a bit larger
 | ||
|   # than the default (size=3)
 | ||
|   geom_point(shape=21, size=3) +
 | ||
| 
 | ||
|   # The bw theme is recommended for publications
 | ||
|   theme_bw() +
 | ||
| 
 | ||
|   # Here we add the title and the labels of the axes
 | ||
|   labs(x="Nodes", y="Time * Num Procs", title="HPCG strong scalability: Z axis",
 | ||
|     color="Blocks Per CPU",
 | ||
|     subtitle=input_file) +
 | ||
| 
 | ||
|   # And set the subtitle font size a bit smaller, so it fits nicely
 | ||
|   theme(plot.subtitle=element_text(size=8))
 | ||
| 
 | ||
| # Then, we save the plot both in png and pdf
 | ||
| ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
 | ||
| ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
 | ||
| 
 | 
