forked from rarias/bscpkgs
		
	bigsort: add experiment with input generation
This commit is contained in:
		
							parent
							
								
									0bb5c76aad
								
							
						
					
					
						commit
						aca7e36fc7
					
				
							
								
								
									
										72
									
								
								garlic/exp/bigsort/genseq.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								garlic/exp/bigsort/genseq.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,72 @@ | ||||
| { | ||||
|   stdenv | ||||
| , stdexp | ||||
| , bsc | ||||
| , targetMachine | ||||
| , stages | ||||
| , n # must be a string | ||||
| , dram # must be a string | ||||
| , strace | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| 
 | ||||
| # Ensure the arguments are strings, to avoid problems with large numbers | ||||
| assert (isString n); | ||||
| assert (isString dram); | ||||
| 
 | ||||
| let | ||||
|   # Initial variable configuration | ||||
|   varConf = with bsc; { }; | ||||
| 
 | ||||
|   inherit (targetMachine) fs; | ||||
| 
 | ||||
|   # Generate the complete configuration for each unit | ||||
|   genConf = with bsc; c: targetMachine.config // rec { | ||||
|     expName = "genseq"; | ||||
|     unitName = "${expName}.n${n}.dram${dram}"; | ||||
|     inherit (targetMachine.config) hw; | ||||
|     inherit n dram; | ||||
| 
 | ||||
|     # Don't repeat | ||||
|     loops = 1; | ||||
| 
 | ||||
|     # Resources | ||||
|     qos = "debug"; | ||||
|     ntasksPerNode = 1; | ||||
|     nodes = 1; | ||||
|     time = "01:00:00"; | ||||
|     cpusPerTask = hw.cpusPerNode; | ||||
|     jobName = unitName; | ||||
|   }; | ||||
| 
 | ||||
|   # Compute the array of configurations | ||||
|   configs = stdexp.buildConfigs { | ||||
|     inherit varConf genConf; | ||||
|   }; | ||||
| 
 | ||||
|   exec = {nextStage, conf, ...}: with conf; | ||||
|   let | ||||
|     #FIXME: We need a better mechanism to get the output paths | ||||
|     outDir = "${fs.shared.fast}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN"; | ||||
|     outFile = "${outDir}/seq.dat"; | ||||
|   in | ||||
|     stages.exec { | ||||
|       inherit nextStage; | ||||
|       pre = '' | ||||
|         mkdir -p "${outDir}" | ||||
|       ''; | ||||
|       argv = [ n dram outFile ]; | ||||
|       post = '' | ||||
|         # Link the output here | ||||
|         ln -s "${outFile}" seq.dat | ||||
|       ''; | ||||
|     }; | ||||
| 
 | ||||
|   program = {...}: bsc.apps.bigsort.genseq; | ||||
| 
 | ||||
|   pipeline = stdexp.stdPipeline ++ [ exec program ]; | ||||
| 
 | ||||
| in | ||||
| 
 | ||||
|   stdexp.genExperiment { inherit configs pipeline; } | ||||
| @ -1,80 +0,0 @@ | ||||
| { | ||||
|   stdenv | ||||
| , stdexp | ||||
| , bsc | ||||
| , targetMachine | ||||
| , stages | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| 
 | ||||
| let | ||||
|   # Initial variable configuration | ||||
|   varConf = with bsc; { | ||||
|     n = [ 134217728 ]; | ||||
|     bs = [ 134217728 ]; | ||||
|   }; | ||||
| 
 | ||||
|   # Generate the complete configuration for each unit | ||||
|   genConf = with bsc; c: targetMachine.config // rec { | ||||
|     expName = "bigsort.mpi+omp"; | ||||
|     unitName = "${expName}.bs${toString bs}"; | ||||
|     inherit (targetMachine.config) hw; | ||||
| 
 | ||||
|     # hpcg options | ||||
|     n = c.n; | ||||
|     bs = c.bs; | ||||
|     cc = bsc.icc; | ||||
|     mpi = bsc.mpi; # TODO: Remove this for oss | ||||
|     gitBranch = "garlic/mpi+send+omp+task"; | ||||
| 
 | ||||
|     # Repeat the execution of each unit 30 times | ||||
|     loops = 1; | ||||
| 
 | ||||
|     # Resources | ||||
|     qos = "debug"; | ||||
|     ntasksPerNode = 1; | ||||
|     nodes = 1; | ||||
|     time = "01:00:00"; | ||||
|     # All CPUs of the socket to each task | ||||
|     cpusPerTask = hw.cpusPerSocket; | ||||
|     jobName = "bigsort-${toString n}-${toString bs}-${gitBranch}"; | ||||
|   }; | ||||
| 
 | ||||
|   # Compute the array of configurations | ||||
|   configs = stdexp.buildConfigs { | ||||
|     inherit varConf genConf; | ||||
|   }; | ||||
| 
 | ||||
|   # input = genInput configs; | ||||
| 
 | ||||
|   exec = {nextStage, conf, ...}: with conf; stages.exec { | ||||
|     inherit nextStage; | ||||
|     #env = "NANOS6_DEPENDENCIES=discrete"; | ||||
|     argv = [ | ||||
|       "${toString n}" | ||||
|       "${toString bs}" | ||||
|       "/gpfs/scratch/bsc15/bsc15065/BigSort/1g_unsorted.dat" | ||||
|       "/gpfs/scratch/bsc15/bsc15065/BigSort/1g_sorted.dat" | ||||
|       "/gpfs/scratch/bsc15/bsc15065/BigSort/tmp" | ||||
|       #"${toString inputFile}" | ||||
|       #"${toString outputFile}" | ||||
|       #"$TMPDIR" | ||||
|       "${toString (builtins.div bs 2)}" | ||||
|     ]; | ||||
|   }; | ||||
| 
 | ||||
|   program = {nextStage, conf, ...}: with conf; | ||||
|   let | ||||
|     customPkgs = stdexp.replaceMpi conf.mpi; | ||||
|   in | ||||
|     customPkgs.apps.bigsort.override { | ||||
|       inherit cc gitBranch; | ||||
|     }; | ||||
| 
 | ||||
|   pipeline = stdexp.stdPipeline ++ [ exec program ]; | ||||
| 
 | ||||
| in | ||||
| 
 | ||||
|   #{ inherit configs pipeline; } | ||||
|   stdexp.genExperiment { inherit configs pipeline; } | ||||
							
								
								
									
										101
									
								
								garlic/exp/bigsort/shuffle.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								garlic/exp/bigsort/shuffle.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,101 @@ | ||||
| { | ||||
|   stdenv | ||||
| , stdexp | ||||
| , bsc | ||||
| , targetMachine | ||||
| , stages | ||||
| , inputTre | ||||
| , n | ||||
| , dram | ||||
| , garlicTools | ||||
| , resultFromTrebuchet | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| with garlicTools; | ||||
| 
 | ||||
| let | ||||
|   # Initial variable configuration | ||||
|   varConf = with bsc; { }; | ||||
| 
 | ||||
|   inherit (targetMachine) fs; | ||||
| 
 | ||||
|   # Generate the complete configuration for each unit | ||||
|   genConf = with bsc; c: targetMachine.config // rec { | ||||
|     expName = "shuffle"; | ||||
|     unitName = "${expName}.n${n}.dram${dram}"; | ||||
|     inherit (targetMachine.config) hw; | ||||
|     inherit n dram; | ||||
| 
 | ||||
|     # Don't repeat | ||||
|     loops = 1; | ||||
| 
 | ||||
|     # Resources | ||||
|     qos = "debug"; | ||||
|     ntasksPerNode = 1; | ||||
|     nodes = 1; | ||||
|     time = "01:00:00"; | ||||
|     cpusPerTask = hw.cpusPerNode; | ||||
|     jobName = unitName; | ||||
| 
 | ||||
|     # We need access to a fast shared filesystem to store the shuffled input | ||||
|     # dataset | ||||
|     extraMounts = [ fs.shared.fast ]; | ||||
|   }; | ||||
| 
 | ||||
|   # Compute the array of configurations | ||||
|   configs = stdexp.buildConfigs { | ||||
|     inherit varConf genConf; | ||||
|   }; | ||||
| 
 | ||||
|   exec = {nextStage, conf, ...}: with conf; | ||||
|   let | ||||
|     inputExp = inputTre.experiment; | ||||
|     inputUnit = elemAt inputExp.units 0; | ||||
|     unitName = baseNameOf (toString inputUnit); | ||||
| 
 | ||||
|     # We also need the result. This is only used to ensure that we have the | ||||
|     # results, so it has been executed. | ||||
|     inputRes = resultFromTrebuchet inputTre; | ||||
| 
 | ||||
|     #FIXME: We need a better mechanism to get the output paths | ||||
|     inFile = "${fs.shared.fast}/out/$GARLIC_USER/${unitName}/1/seq.dat"; | ||||
|     outDir = "${fs.shared.fast}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN"; | ||||
|     outFile = "${outDir}/shuffled.dat"; | ||||
| 
 | ||||
|   in | ||||
|     stages.exec { | ||||
|       inherit nextStage; | ||||
|       pre = '' | ||||
|         # This line ensures that the previous results are complete: | ||||
|         # ${inputRes} | ||||
| 
 | ||||
|         # Exit on error | ||||
|         set -e | ||||
| 
 | ||||
|         # Ensure the input file exists | ||||
|         if [ ! -f "${inFile}" ]; then | ||||
|           echo "input file not found: ${inFile}" | ||||
|           exit 1 | ||||
|         fi | ||||
| 
 | ||||
|         mkdir -p "${outDir}" | ||||
| 
 | ||||
|         # Copy the input as we are going to overwrite it | ||||
|         cp "${inFile}" "${outFile}" | ||||
|       ''; | ||||
|       argv = [ n dram outFile 16 64 ]; | ||||
|       post = '' | ||||
|         # Link the output here | ||||
|         ln -s "${outFile}" shuffled.dat | ||||
|       ''; | ||||
|     }; | ||||
| 
 | ||||
|   program = {...}: | ||||
|     bsc.apps.bigsort.shuffle; | ||||
| 
 | ||||
|   pipeline = stdexp.stdPipeline ++ [ exec program ]; | ||||
| 
 | ||||
| in | ||||
| 
 | ||||
|   stdexp.genExperiment { inherit configs pipeline; } | ||||
							
								
								
									
										125
									
								
								garlic/exp/bigsort/sort.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								garlic/exp/bigsort/sort.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,125 @@ | ||||
| { | ||||
|   stdenv | ||||
| , stdexp | ||||
| , bsc | ||||
| , targetMachine | ||||
| , stages | ||||
| , removeOutput ? true | ||||
| , resultFromTrebuchet | ||||
| , inputTre | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| 
 | ||||
| let | ||||
|   varConf = { }; # Not used | ||||
| 
 | ||||
|   inherit (targetMachine) fs; | ||||
| 
 | ||||
|   # Generate the complete configuration for each unit | ||||
|   genConf = with bsc; c: targetMachine.config // rec { | ||||
|     expName = "bigsort"; | ||||
|     unitName = "${expName}.bs${toString bs}"; | ||||
|     inherit (targetMachine.config) hw; | ||||
| 
 | ||||
|     # bigsort options | ||||
|     n = 1024 * 1024 * 1024 / 8; # In longs (?) | ||||
|     bs = n; # In bytes | ||||
|     pageSize = bs / 2; # In bytes (?) | ||||
|     cc = bsc.icc; | ||||
|     mpi = bsc.impi; | ||||
|     gitBranch = "garlic/mpi+send+omp+task"; | ||||
| 
 | ||||
|     # Repeat the execution of each unit 30 times | ||||
|     loops = 1; | ||||
| 
 | ||||
|     # Resources | ||||
|     qos = "debug"; | ||||
|     ntasksPerNode = 1; | ||||
|     nodes = 1; | ||||
|     time = "01:00:00"; | ||||
|     # All CPUs of the socket to each task | ||||
|     cpusPerTask = hw.cpusPerSocket; | ||||
|     jobName = "bigsort-${toString n}-${toString bs}-${gitBranch}"; | ||||
| 
 | ||||
|     # Load the dataset from the same fs where it was stored in the shuffle | ||||
|     # step. Also we use a local temp fs to store intermediate results. | ||||
|     extraMounts = [ fs.shared.fast fs.local.temp ]; | ||||
| 
 | ||||
|     rev = 1; | ||||
|   }; | ||||
| 
 | ||||
|   # Compute the array of configurations | ||||
|   configs = stdexp.buildConfigs { | ||||
|     inherit varConf genConf; | ||||
|   }; | ||||
| 
 | ||||
|   exec = {nextStage, conf, ...}: with conf; | ||||
|   let | ||||
|     inputExp = inputTre.experiment; | ||||
|     unit = elemAt inputExp.units 0; | ||||
|     expName = baseNameOf (toString inputExp); | ||||
|     unitName = baseNameOf (toString unit); | ||||
| 
 | ||||
|     # We also need the result. This is only used to ensure that we have the | ||||
|     # results, so it has been executed. | ||||
|     inputRes = resultFromTrebuchet inputTre; | ||||
| 
 | ||||
|     #FIXME: We need a better mechanism to get the output paths | ||||
|     inFile = "${fs.shared.fast}/out/$GARLIC_USER/${unitName}/1/shuffled.dat"; | ||||
|     outDir = "${fs.shared.fast}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN"; | ||||
|     outFile = "${outDir}/sorted.dat"; | ||||
|     tmpDir = fs.local.temp; | ||||
|   in | ||||
|     stages.exec { | ||||
|     inherit nextStage; | ||||
|     pre = '' | ||||
|       # This line ensures that the shuffled results are complete: nix needs to | ||||
|       # compute the hash of the execution log to write the path here. | ||||
|       # ${inputRes} | ||||
| 
 | ||||
|       # Exit on error | ||||
|       set -e | ||||
| 
 | ||||
|       # Ensure the input file exists | ||||
|       if [ ! -f "${inFile}" ]; then | ||||
|         echo "input file not found: ${inFile}" | ||||
|         exit 1 | ||||
|       fi | ||||
| 
 | ||||
|       # Create the output path | ||||
|       mkdir -p ${outDir} | ||||
| 
 | ||||
|       # Verbose args: | ||||
|       echo "INPUT  = ${inFile}" | ||||
|       echo "OUTPUT = ${outFile}" | ||||
|       echo "TMPDIR = ${tmpDir}" | ||||
|     ''; | ||||
| 
 | ||||
|     argv = [ n bs inFile outFile tmpDir pageSize ]; | ||||
| 
 | ||||
|     # Optionally remove the potentially large output dataset | ||||
|     post = '' | ||||
|       # Link the output here | ||||
|       ln -s "${outFile}" sorted.dat | ||||
|     '' + optionalString (removeOutput) '' | ||||
|       # Remove the sorted output | ||||
|       stat "${outFile}" > "${outFile}.stat" | ||||
|       echo "file removed to save space" > "${outFile}" | ||||
|     ''; | ||||
|   }; | ||||
| 
 | ||||
|   program = {nextStage, conf, ...}: with conf; | ||||
|   let | ||||
|     customPkgs = stdexp.replaceMpi conf.mpi; | ||||
|   in | ||||
|     customPkgs.apps.bigsort.sort.override { | ||||
|       inherit cc mpi gitBranch; | ||||
|     }; | ||||
| 
 | ||||
|   pipeline = stdexp.stdPipeline ++ [ exec program ]; | ||||
| 
 | ||||
| in | ||||
| 
 | ||||
|   #{ inherit configs pipeline; } | ||||
|   stdexp.genExperiment { inherit configs pipeline; } | ||||
							
								
								
									
										20
									
								
								overlay.nix
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								overlay.nix
									
									
									
									
									
								
							| @ -379,8 +379,24 @@ let | ||||
|           test = callPackage ./garlic/exp/heat/test.nix { }; | ||||
|         }; | ||||
| 
 | ||||
| 	bigsort = { | ||||
| 	  test = callPackage ./garlic/exp/bigsort/mpi+omp.nix { }; | ||||
| 	bigsort = rec { | ||||
|           genseq = callPackage ./garlic/exp/bigsort/genseq.nix { | ||||
|             n = toString (1024 * 1024 * 1024 / 8); # 1 GB input size | ||||
|             dram = toString (1024 * 1024 * 1024); # 1 GB chunk | ||||
|           }; | ||||
| 
 | ||||
|           shuffle = callPackage ./garlic/exp/bigsort/shuffle.nix { | ||||
|             inputTre = genseq; | ||||
|             n = toString (1024 * 1024 * 1024 / 8); # 1 GB input size | ||||
|             dram = toString (1024 * 1024 * 1024); # 1 GB chunk | ||||
|             inherit (bsc.garlic.pp) resultFromTrebuchet; | ||||
|           }; | ||||
| 
 | ||||
|           sort = callPackage ./garlic/exp/bigsort/sort.nix { | ||||
|             inputTre = shuffle; | ||||
|             inherit (bsc.garlic.pp) resultFromTrebuchet; | ||||
|             removeOutput = false; | ||||
|           }; | ||||
| 	}; | ||||
| 
 | ||||
|         slurm = { | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user