bigsort: add experiment with input generation
This commit is contained in:
parent
0bb5c76aad
commit
aca7e36fc7
72
garlic/exp/bigsort/genseq.nix
Normal file
72
garlic/exp/bigsort/genseq.nix
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
{
|
||||||
|
stdenv
|
||||||
|
, stdexp
|
||||||
|
, bsc
|
||||||
|
, targetMachine
|
||||||
|
, stages
|
||||||
|
, n # must be a string
|
||||||
|
, dram # must be a string
|
||||||
|
, strace
|
||||||
|
}:
|
||||||
|
|
||||||
|
with stdenv.lib;
|
||||||
|
|
||||||
|
# Ensure the arguments are strings, to avoid problems with large numbers
|
||||||
|
assert (isString n);
|
||||||
|
assert (isString dram);
|
||||||
|
|
||||||
|
let
|
||||||
|
# Initial variable configuration
|
||||||
|
varConf = with bsc; { };
|
||||||
|
|
||||||
|
inherit (targetMachine) fs;
|
||||||
|
|
||||||
|
# Generate the complete configuration for each unit
|
||||||
|
genConf = with bsc; c: targetMachine.config // rec {
|
||||||
|
expName = "genseq";
|
||||||
|
unitName = "${expName}.n${n}.dram${dram}";
|
||||||
|
inherit (targetMachine.config) hw;
|
||||||
|
inherit n dram;
|
||||||
|
|
||||||
|
# Don't repeat
|
||||||
|
loops = 1;
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
qos = "debug";
|
||||||
|
ntasksPerNode = 1;
|
||||||
|
nodes = 1;
|
||||||
|
time = "01:00:00";
|
||||||
|
cpusPerTask = hw.cpusPerNode;
|
||||||
|
jobName = unitName;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Compute the array of configurations
|
||||||
|
configs = stdexp.buildConfigs {
|
||||||
|
inherit varConf genConf;
|
||||||
|
};
|
||||||
|
|
||||||
|
exec = {nextStage, conf, ...}: with conf;
|
||||||
|
let
|
||||||
|
#FIXME: We need a better mechanism to get the output paths
|
||||||
|
outDir = "${fs.shared.fast}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN";
|
||||||
|
outFile = "${outDir}/seq.dat";
|
||||||
|
in
|
||||||
|
stages.exec {
|
||||||
|
inherit nextStage;
|
||||||
|
pre = ''
|
||||||
|
mkdir -p "${outDir}"
|
||||||
|
'';
|
||||||
|
argv = [ n dram outFile ];
|
||||||
|
post = ''
|
||||||
|
# Link the output here
|
||||||
|
ln -s "${outFile}" seq.dat
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
program = {...}: bsc.apps.bigsort.genseq;
|
||||||
|
|
||||||
|
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
||||||
|
|
||||||
|
in
|
||||||
|
|
||||||
|
stdexp.genExperiment { inherit configs pipeline; }
|
@ -1,80 +0,0 @@
|
|||||||
{
|
|
||||||
stdenv
|
|
||||||
, stdexp
|
|
||||||
, bsc
|
|
||||||
, targetMachine
|
|
||||||
, stages
|
|
||||||
}:
|
|
||||||
|
|
||||||
with stdenv.lib;
|
|
||||||
|
|
||||||
let
|
|
||||||
# Initial variable configuration
|
|
||||||
varConf = with bsc; {
|
|
||||||
n = [ 134217728 ];
|
|
||||||
bs = [ 134217728 ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Generate the complete configuration for each unit
|
|
||||||
genConf = with bsc; c: targetMachine.config // rec {
|
|
||||||
expName = "bigsort.mpi+omp";
|
|
||||||
unitName = "${expName}.bs${toString bs}";
|
|
||||||
inherit (targetMachine.config) hw;
|
|
||||||
|
|
||||||
# hpcg options
|
|
||||||
n = c.n;
|
|
||||||
bs = c.bs;
|
|
||||||
cc = bsc.icc;
|
|
||||||
mpi = bsc.mpi; # TODO: Remove this for oss
|
|
||||||
gitBranch = "garlic/mpi+send+omp+task";
|
|
||||||
|
|
||||||
# Repeat the execution of each unit 30 times
|
|
||||||
loops = 1;
|
|
||||||
|
|
||||||
# Resources
|
|
||||||
qos = "debug";
|
|
||||||
ntasksPerNode = 1;
|
|
||||||
nodes = 1;
|
|
||||||
time = "01:00:00";
|
|
||||||
# All CPUs of the socket to each task
|
|
||||||
cpusPerTask = hw.cpusPerSocket;
|
|
||||||
jobName = "bigsort-${toString n}-${toString bs}-${gitBranch}";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Compute the array of configurations
|
|
||||||
configs = stdexp.buildConfigs {
|
|
||||||
inherit varConf genConf;
|
|
||||||
};
|
|
||||||
|
|
||||||
# input = genInput configs;
|
|
||||||
|
|
||||||
exec = {nextStage, conf, ...}: with conf; stages.exec {
|
|
||||||
inherit nextStage;
|
|
||||||
#env = "NANOS6_DEPENDENCIES=discrete";
|
|
||||||
argv = [
|
|
||||||
"${toString n}"
|
|
||||||
"${toString bs}"
|
|
||||||
"/gpfs/scratch/bsc15/bsc15065/BigSort/1g_unsorted.dat"
|
|
||||||
"/gpfs/scratch/bsc15/bsc15065/BigSort/1g_sorted.dat"
|
|
||||||
"/gpfs/scratch/bsc15/bsc15065/BigSort/tmp"
|
|
||||||
#"${toString inputFile}"
|
|
||||||
#"${toString outputFile}"
|
|
||||||
#"$TMPDIR"
|
|
||||||
"${toString (builtins.div bs 2)}"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
program = {nextStage, conf, ...}: with conf;
|
|
||||||
let
|
|
||||||
customPkgs = stdexp.replaceMpi conf.mpi;
|
|
||||||
in
|
|
||||||
customPkgs.apps.bigsort.override {
|
|
||||||
inherit cc gitBranch;
|
|
||||||
};
|
|
||||||
|
|
||||||
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
|
||||||
|
|
||||||
in
|
|
||||||
|
|
||||||
#{ inherit configs pipeline; }
|
|
||||||
stdexp.genExperiment { inherit configs pipeline; }
|
|
101
garlic/exp/bigsort/shuffle.nix
Normal file
101
garlic/exp/bigsort/shuffle.nix
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
{
|
||||||
|
stdenv
|
||||||
|
, stdexp
|
||||||
|
, bsc
|
||||||
|
, targetMachine
|
||||||
|
, stages
|
||||||
|
, inputTre
|
||||||
|
, n
|
||||||
|
, dram
|
||||||
|
, garlicTools
|
||||||
|
, resultFromTrebuchet
|
||||||
|
}:
|
||||||
|
|
||||||
|
with stdenv.lib;
|
||||||
|
with garlicTools;
|
||||||
|
|
||||||
|
let
|
||||||
|
# Initial variable configuration
|
||||||
|
varConf = with bsc; { };
|
||||||
|
|
||||||
|
inherit (targetMachine) fs;
|
||||||
|
|
||||||
|
# Generate the complete configuration for each unit
|
||||||
|
genConf = with bsc; c: targetMachine.config // rec {
|
||||||
|
expName = "shuffle";
|
||||||
|
unitName = "${expName}.n${n}.dram${dram}";
|
||||||
|
inherit (targetMachine.config) hw;
|
||||||
|
inherit n dram;
|
||||||
|
|
||||||
|
# Don't repeat
|
||||||
|
loops = 1;
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
qos = "debug";
|
||||||
|
ntasksPerNode = 1;
|
||||||
|
nodes = 1;
|
||||||
|
time = "01:00:00";
|
||||||
|
cpusPerTask = hw.cpusPerNode;
|
||||||
|
jobName = unitName;
|
||||||
|
|
||||||
|
# We need access to a fast shared filesystem to store the shuffled input
|
||||||
|
# dataset
|
||||||
|
extraMounts = [ fs.shared.fast ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# Compute the array of configurations
|
||||||
|
configs = stdexp.buildConfigs {
|
||||||
|
inherit varConf genConf;
|
||||||
|
};
|
||||||
|
|
||||||
|
exec = {nextStage, conf, ...}: with conf;
|
||||||
|
let
|
||||||
|
inputExp = inputTre.experiment;
|
||||||
|
inputUnit = elemAt inputExp.units 0;
|
||||||
|
unitName = baseNameOf (toString inputUnit);
|
||||||
|
|
||||||
|
# We also need the result. This is only used to ensure that we have the
|
||||||
|
# results, so it has been executed.
|
||||||
|
inputRes = resultFromTrebuchet inputTre;
|
||||||
|
|
||||||
|
#FIXME: We need a better mechanism to get the output paths
|
||||||
|
inFile = "${fs.shared.fast}/out/$GARLIC_USER/${unitName}/1/seq.dat";
|
||||||
|
outDir = "${fs.shared.fast}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN";
|
||||||
|
outFile = "${outDir}/shuffled.dat";
|
||||||
|
|
||||||
|
in
|
||||||
|
stages.exec {
|
||||||
|
inherit nextStage;
|
||||||
|
pre = ''
|
||||||
|
# This line ensures that the previous results are complete:
|
||||||
|
# ${inputRes}
|
||||||
|
|
||||||
|
# Exit on error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Ensure the input file exists
|
||||||
|
if [ ! -f "${inFile}" ]; then
|
||||||
|
echo "input file not found: ${inFile}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "${outDir}"
|
||||||
|
|
||||||
|
# Copy the input as we are going to overwrite it
|
||||||
|
cp "${inFile}" "${outFile}"
|
||||||
|
'';
|
||||||
|
argv = [ n dram outFile 16 64 ];
|
||||||
|
post = ''
|
||||||
|
# Link the output here
|
||||||
|
ln -s "${outFile}" shuffled.dat
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
program = {...}:
|
||||||
|
bsc.apps.bigsort.shuffle;
|
||||||
|
|
||||||
|
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
||||||
|
|
||||||
|
in
|
||||||
|
|
||||||
|
stdexp.genExperiment { inherit configs pipeline; }
|
125
garlic/exp/bigsort/sort.nix
Normal file
125
garlic/exp/bigsort/sort.nix
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
{
|
||||||
|
stdenv
|
||||||
|
, stdexp
|
||||||
|
, bsc
|
||||||
|
, targetMachine
|
||||||
|
, stages
|
||||||
|
, removeOutput ? true
|
||||||
|
, resultFromTrebuchet
|
||||||
|
, inputTre
|
||||||
|
}:
|
||||||
|
|
||||||
|
with stdenv.lib;
|
||||||
|
|
||||||
|
let
|
||||||
|
varConf = { }; # Not used
|
||||||
|
|
||||||
|
inherit (targetMachine) fs;
|
||||||
|
|
||||||
|
# Generate the complete configuration for each unit
|
||||||
|
genConf = with bsc; c: targetMachine.config // rec {
|
||||||
|
expName = "bigsort";
|
||||||
|
unitName = "${expName}.bs${toString bs}";
|
||||||
|
inherit (targetMachine.config) hw;
|
||||||
|
|
||||||
|
# bigsort options
|
||||||
|
n = 1024 * 1024 * 1024 / 8; # In longs (?)
|
||||||
|
bs = n; # In bytes
|
||||||
|
pageSize = bs / 2; # In bytes (?)
|
||||||
|
cc = bsc.icc;
|
||||||
|
mpi = bsc.impi;
|
||||||
|
gitBranch = "garlic/mpi+send+omp+task";
|
||||||
|
|
||||||
|
# Repeat the execution of each unit 30 times
|
||||||
|
loops = 1;
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
qos = "debug";
|
||||||
|
ntasksPerNode = 1;
|
||||||
|
nodes = 1;
|
||||||
|
time = "01:00:00";
|
||||||
|
# All CPUs of the socket to each task
|
||||||
|
cpusPerTask = hw.cpusPerSocket;
|
||||||
|
jobName = "bigsort-${toString n}-${toString bs}-${gitBranch}";
|
||||||
|
|
||||||
|
# Load the dataset from the same fs where it was stored in the shuffle
|
||||||
|
# step. Also we use a local temp fs to store intermediate results.
|
||||||
|
extraMounts = [ fs.shared.fast fs.local.temp ];
|
||||||
|
|
||||||
|
rev = 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Compute the array of configurations
|
||||||
|
configs = stdexp.buildConfigs {
|
||||||
|
inherit varConf genConf;
|
||||||
|
};
|
||||||
|
|
||||||
|
exec = {nextStage, conf, ...}: with conf;
|
||||||
|
let
|
||||||
|
inputExp = inputTre.experiment;
|
||||||
|
unit = elemAt inputExp.units 0;
|
||||||
|
expName = baseNameOf (toString inputExp);
|
||||||
|
unitName = baseNameOf (toString unit);
|
||||||
|
|
||||||
|
# We also need the result. This is only used to ensure that we have the
|
||||||
|
# results, so it has been executed.
|
||||||
|
inputRes = resultFromTrebuchet inputTre;
|
||||||
|
|
||||||
|
#FIXME: We need a better mechanism to get the output paths
|
||||||
|
inFile = "${fs.shared.fast}/out/$GARLIC_USER/${unitName}/1/shuffled.dat";
|
||||||
|
outDir = "${fs.shared.fast}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN";
|
||||||
|
outFile = "${outDir}/sorted.dat";
|
||||||
|
tmpDir = fs.local.temp;
|
||||||
|
in
|
||||||
|
stages.exec {
|
||||||
|
inherit nextStage;
|
||||||
|
pre = ''
|
||||||
|
# This line ensures that the shuffled results are complete: nix needs to
|
||||||
|
# compute the hash of the execution log to write the path here.
|
||||||
|
# ${inputRes}
|
||||||
|
|
||||||
|
# Exit on error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Ensure the input file exists
|
||||||
|
if [ ! -f "${inFile}" ]; then
|
||||||
|
echo "input file not found: ${inFile}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create the output path
|
||||||
|
mkdir -p ${outDir}
|
||||||
|
|
||||||
|
# Verbose args:
|
||||||
|
echo "INPUT = ${inFile}"
|
||||||
|
echo "OUTPUT = ${outFile}"
|
||||||
|
echo "TMPDIR = ${tmpDir}"
|
||||||
|
'';
|
||||||
|
|
||||||
|
argv = [ n bs inFile outFile tmpDir pageSize ];
|
||||||
|
|
||||||
|
# Optionally remove the potentially large output dataset
|
||||||
|
post = ''
|
||||||
|
# Link the output here
|
||||||
|
ln -s "${outFile}" sorted.dat
|
||||||
|
'' + optionalString (removeOutput) ''
|
||||||
|
# Remove the sorted output
|
||||||
|
stat "${outFile}" > "${outFile}.stat"
|
||||||
|
echo "file removed to save space" > "${outFile}"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
program = {nextStage, conf, ...}: with conf;
|
||||||
|
let
|
||||||
|
customPkgs = stdexp.replaceMpi conf.mpi;
|
||||||
|
in
|
||||||
|
customPkgs.apps.bigsort.sort.override {
|
||||||
|
inherit cc mpi gitBranch;
|
||||||
|
};
|
||||||
|
|
||||||
|
pipeline = stdexp.stdPipeline ++ [ exec program ];
|
||||||
|
|
||||||
|
in
|
||||||
|
|
||||||
|
#{ inherit configs pipeline; }
|
||||||
|
stdexp.genExperiment { inherit configs pipeline; }
|
20
overlay.nix
20
overlay.nix
@ -379,8 +379,24 @@ let
|
|||||||
test = callPackage ./garlic/exp/heat/test.nix { };
|
test = callPackage ./garlic/exp/heat/test.nix { };
|
||||||
};
|
};
|
||||||
|
|
||||||
bigsort = {
|
bigsort = rec {
|
||||||
test = callPackage ./garlic/exp/bigsort/mpi+omp.nix { };
|
genseq = callPackage ./garlic/exp/bigsort/genseq.nix {
|
||||||
|
n = toString (1024 * 1024 * 1024 / 8); # 1 GB input size
|
||||||
|
dram = toString (1024 * 1024 * 1024); # 1 GB chunk
|
||||||
|
};
|
||||||
|
|
||||||
|
shuffle = callPackage ./garlic/exp/bigsort/shuffle.nix {
|
||||||
|
inputTre = genseq;
|
||||||
|
n = toString (1024 * 1024 * 1024 / 8); # 1 GB input size
|
||||||
|
dram = toString (1024 * 1024 * 1024); # 1 GB chunk
|
||||||
|
inherit (bsc.garlic.pp) resultFromTrebuchet;
|
||||||
|
};
|
||||||
|
|
||||||
|
sort = callPackage ./garlic/exp/bigsort/sort.nix {
|
||||||
|
inputTre = shuffle;
|
||||||
|
inherit (bsc.garlic.pp) resultFromTrebuchet;
|
||||||
|
removeOutput = false;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
slurm = {
|
slurm = {
|
||||||
|
Loading…
Reference in New Issue
Block a user