hpcg: refactor ss and gen using a common file
- The file gen.nix now provides an experiment for each unit, to reduce the evaluation time. - The pipeline is specified in the common.nix file only. - The input dataset path is no longer symlinked, but is specified in the "--load" argument. - The size is renamed to "sizePerTask" instead of "n".
This commit is contained in:
		
							parent
							
								
									9bb570af7f
								
							
						
					
					
						commit
						b4e37a15a9
					
				
							
								
								
									
										72
									
								
								garlic/exp/hpcg/common.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								garlic/exp/hpcg/common.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,72 @@ | ||||
| { | ||||
|   stdenv | ||||
| , stdexp | ||||
| , bsc | ||||
| , stages | ||||
| , callPackage | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| 
 | ||||
| rec { | ||||
| 
 | ||||
|   checkInput = {nextStage, conf, ...}: stages.exec { | ||||
|     inherit nextStage; | ||||
|     pre = optionalString (! (conf.enableGen or false)) ( | ||||
|     let | ||||
|       gen = callPackage ./gen.nix { }; | ||||
|       inputTre = gen.getInputTre conf; | ||||
|       exp = inputTre.experiment; | ||||
|       unit = elemAt exp.units 0; | ||||
|       expName = baseNameOf (toString exp); | ||||
|       unitName = baseNameOf (toString unit); | ||||
|       inputPath = "$GARLIC_OUT/${expName}/${unitName}/1"; | ||||
|     in | ||||
|       '' | ||||
|         # Force the generation of the input resultTree as a dependency: | ||||
|         # ${toString inputTre.result} | ||||
| 
 | ||||
|         # Ensure the input dataset is still available | ||||
|         export HPCG_INPUT_PATH="${toString inputPath}" | ||||
| 
 | ||||
|         if [ ! -e "$HPCG_INPUT_PATH" ]; then | ||||
|           >&2 echo "Missing input dataset: $HPCG_INPUT_PATH" | ||||
|           exit 1 | ||||
|         fi | ||||
|       '' | ||||
|     ); | ||||
|   }; | ||||
| 
 | ||||
|   getSizePerTask = cpusPerTask: sizePerCpu: | ||||
|     mapAttrs (name: val: val * cpusPerTask) sizePerCpu; | ||||
| 
 | ||||
|   exec = {nextStage, conf, ...}: let | ||||
|     actionArg = if (conf.enableGen or false) | ||||
|     then "--store=." | ||||
|     else "--load=\"$HPCG_INPUT_PATH\""; | ||||
| 
 | ||||
|   in stages.exec { | ||||
|     inherit nextStage; | ||||
|     argv = [ | ||||
|       "--nx=${toString conf.sizePerTask.x}" | ||||
|       "--ny=${toString conf.sizePerTask.y}" | ||||
|       "--nz=${toString conf.sizePerTask.z}" | ||||
|       "--npx=${toString conf.nprocs.x}" | ||||
|       "--npy=${toString conf.nprocs.y}" | ||||
|       "--npz=${toString conf.nprocs.z}" | ||||
|       "--nblocks=${toString conf.nblocks}" | ||||
|       "--ncomms=${toString conf.ncomms}" | ||||
|       # The input symlink is generated by the input stage, which is generated by | ||||
|       # the genInput function. | ||||
|       actionArg | ||||
|     ] ++ optional (conf.disableAspectRatio or false) "--no-ar=1"; | ||||
|   }; | ||||
| 
 | ||||
|   program = {nextStage, conf, ...}: bsc.apps.hpcg.override { | ||||
|     inherit (conf) gitBranch; | ||||
|   }; | ||||
| 
 | ||||
|   pipeline = stdexp.stdPipeline ++ [ | ||||
|     checkInput | ||||
|     exec program ]; | ||||
| } | ||||
| @ -5,95 +5,45 @@ | ||||
| , targetMachine | ||||
| , stages | ||||
| , garlicTools | ||||
| , callPackage | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| with builtins; | ||||
| with garlicTools; | ||||
| 
 | ||||
| let | ||||
| rec { | ||||
| 
 | ||||
|   # Generate the complete configuration for each unit | ||||
|   genConf = c: targetMachine.config // rec { | ||||
|     expName = "${c.expName}.gen"; | ||||
|     unitName = "${expName}.n${toString n.x}"; | ||||
|     unitName = "${c.unitName}.gen"; | ||||
| 
 | ||||
|     inherit (targetMachine.config) hw; | ||||
| 
 | ||||
|     # Only the n and gitBranch options are inherited | ||||
|     inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch; | ||||
|     # Inherit options from the current conf | ||||
|     inherit (c) sizePerTask nprocs disableAspectRatio gitBranch | ||||
|       cpusPerTask ntasksPerNode nodes; | ||||
| 
 | ||||
|     # Repeat the execution of each unit 30 times | ||||
|     # nblocks and ncomms are ignored from c | ||||
|     ncomms = 1; | ||||
|     nblocks = 1; | ||||
| 
 | ||||
|     # We only need one run | ||||
|     loops = 1; | ||||
| 
 | ||||
|     # Generate the input | ||||
|     enableGen = true; | ||||
| 
 | ||||
|     # Resources | ||||
|     qos = "debug"; | ||||
|     # ntasksPerNode = hw.socketsPerNode; | ||||
|     # nodes = 2; | ||||
|     time = "00:30:00"; | ||||
|     # task in one socket | ||||
|     cpusPerTask = hw.cpusPerSocket; | ||||
|     time = "02:00:00"; | ||||
|     jobName = unitName; | ||||
|   }; | ||||
| 
 | ||||
|   exec = {nextStage, conf, ...}: with conf; stages.exec { | ||||
|     inherit nextStage; | ||||
|     argv = [ | ||||
|       "--nx=${toString conf.n.x}" | ||||
|       "--ny=${toString conf.n.y}" | ||||
|       "--nz=${toString conf.n.z}" | ||||
|       "--npx=${toString conf.nprocs.x}" | ||||
|       "--npy=${toString conf.nprocs.y}" | ||||
|       "--npz=${toString conf.nprocs.z}" | ||||
|       # nblocks and ncomms are ignored | ||||
|       "--nblocks=1" | ||||
|       "--ncomms=1" | ||||
|       # Store the results in the same directory | ||||
|       "--store=." | ||||
|     ] ++ optional (conf.disableAspectRatio) "--no-ar=1"; | ||||
|   common = callPackage ./common.nix {}; | ||||
| 
 | ||||
|   getInputTre = conf: stdexp.genExperiment { | ||||
|     configs = [ (genConf conf) ]; | ||||
|     pipeline = common.pipeline; | ||||
|   }; | ||||
| 
 | ||||
|   program = {nextStage, conf, ...}: bsc.apps.hpcg.override { | ||||
|     inherit (conf) gitBranch; | ||||
|   }; | ||||
| 
 | ||||
|   pipeline = stdexp.stdPipeline ++ [ exec program ]; | ||||
| 
 | ||||
|   genExp = configs: stdexp.genExperiment { inherit configs pipeline; }; | ||||
|      | ||||
|   genInputLink = inputConfigs: {nextStage, conf, ...}: | ||||
|   let | ||||
|     # Compute the experiment that produces HPCG input matrix from the | ||||
|     # configuration of this unit: | ||||
|     configs = map genConf inputConfigs; | ||||
|     inputTre = genExp configs; | ||||
|     #inputExp = getExperimentStage inputTrebuchet; | ||||
|     #inputExp = trace inputTrebuchet inputTrebuchet.nextStage; | ||||
|     inputExp = getExperimentStage inputTre; | ||||
|     # Then load the result. This is only used to ensure that we have the | ||||
|     # results, so it has been executed. | ||||
|     inputRes = inputTre.result; | ||||
|     # We also need the unit, to compute the path. | ||||
|     inputUnit = stages.unit { | ||||
|       conf = genConf conf; | ||||
|       stages = pipeline; | ||||
|     }; | ||||
|     # Build the path: | ||||
|     expName = baseNameOf (toString inputExp); | ||||
|     unitName = baseNameOf (toString inputUnit); | ||||
|     relPath = "../../${expName}/${unitName}/1"; | ||||
|   in stages.exec { | ||||
|     inherit nextStage; | ||||
|     env = '' | ||||
|       # This line ensures that the results of the HPCG generation are complete: | ||||
|       # ${inputRes} | ||||
| 
 | ||||
|       # Then we simply link the input result directory in "input" | ||||
|       # We use || true because all ranks will execute this and | ||||
|       # the execution will fail | ||||
|       ln -sf ${relPath} input || true | ||||
|     ''; | ||||
|   }; | ||||
| 
 | ||||
| in | ||||
|   #{ inherit genConf genExp genInputLink; } | ||||
|   genInputLink | ||||
| } | ||||
|  | ||||
| @ -1,112 +0,0 @@ | ||||
| { | ||||
|   stdenv | ||||
| , stdexp | ||||
| , bsc | ||||
| , targetMachine | ||||
| , stages | ||||
| , genInput | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| 
 | ||||
| let | ||||
|   # Initial variable configuration | ||||
|   varConf = { | ||||
|     n = [ | ||||
|         { x = 192 / 4; y = 192 / 4; z = 16 * 192; } | ||||
|     ]; | ||||
|     nprocs = [ | ||||
|         # { x = 2; y = 1; z = 1; } | ||||
|         # { x = 4; y = 1; z = 1; } | ||||
|         # { x = 8; y = 1; z = 1; } | ||||
|         # { x = 16; y = 1; z = 1; } | ||||
|         # { x = 32; y = 1; z = 1; } | ||||
| 
 | ||||
|         # { x = 1; y = 2; z = 1; } | ||||
|         # { x = 1; y = 4; z = 1; } | ||||
|         # { x = 1; y = 8; z = 1; } | ||||
|         # { x = 1; y = 16; z = 1; } | ||||
|         # { x = 1; y = 32; z = 1; } | ||||
| 
 | ||||
|         { x = 1; y = 1; z = 2; } | ||||
|         { x = 1; y = 1; z = 4; } | ||||
|         { x = 1; y = 1; z = 8; } | ||||
|         { x = 1; y = 1; z = 16; } | ||||
|         { x = 1; y = 1; z = 32; } | ||||
| 
 | ||||
|     ]; | ||||
|     # nblocks = [ 12 24 48 96 192 384 768 1536 ]; | ||||
|     nblocks = [ 24 48 96 192 384 ]; | ||||
|     ncommblocks = [ 1 ]; | ||||
|     # nodes = [ 1 ]; | ||||
|     # nodes = [ 1 2 4 8 16 ]; | ||||
|   }; | ||||
| 
 | ||||
|   # Generate the complete configuration for each unit | ||||
|   genConf = c: targetMachine.config // rec { | ||||
|     expName = "hpcg.oss"; | ||||
|     unitName = "${expName}.nb${toString nblocks}"; | ||||
| 
 | ||||
|     inherit (targetMachine.config) hw; | ||||
| 
 | ||||
|     # hpcg options | ||||
|     inherit (c) nprocs nblocks ncommblocks; | ||||
| 
 | ||||
|     n = { | ||||
|         x = c.n.x / nprocs.x; | ||||
|         y = c.n.y / nprocs.y; | ||||
|         z = c.n.z / nprocs.z; | ||||
|     }; | ||||
| 
 | ||||
|     gitBranch = "garlic/tampi+isend+oss+task"; | ||||
| 
 | ||||
|     # Repeat the execution of each unit 30 times | ||||
|     loops = 10; | ||||
| 
 | ||||
|     disableAspectRatio = true; | ||||
| 
 | ||||
|     # Resources | ||||
|     qos = "debug"; | ||||
|     ntasksPerNode = hw.socketsPerNode; | ||||
|     time = "02:00:00"; | ||||
|     # task in one socket | ||||
|     cpusPerTask = hw.cpusPerSocket; | ||||
|     nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode; | ||||
|     jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}"; | ||||
|   }; | ||||
| 
 | ||||
|   # Compute the array of configurations | ||||
|   configs = stdexp.buildConfigs { | ||||
|     inherit varConf genConf; | ||||
|   }; | ||||
| 
 | ||||
|   input = genInput configs; | ||||
| 
 | ||||
|   exec = {nextStage, conf, ...}: stages.exec { | ||||
|     inherit nextStage; | ||||
|     argv = [ | ||||
|       "--nx=${toString conf.n.x}" | ||||
|       "--ny=${toString conf.n.y}" | ||||
|       "--nz=${toString conf.n.z}" | ||||
|       # Distribute all processes in X axis | ||||
|       "--npx=${toString conf.nprocs.x}" | ||||
|       "--npy=${toString conf.nprocs.y}" | ||||
|       "--npz=${toString conf.nprocs.z}" | ||||
|       "--nblocks=${toString conf.nblocks}" | ||||
|       "--ncomms=${toString conf.ncommblocks}" | ||||
|       # The input symlink is generated by the input stage, which is generated by | ||||
|       # the genInput function. | ||||
|       "--load=input" | ||||
|       # Disable HPCG Aspect Ratio to run any mpi layout | ||||
|     ] ++ optional (conf.disableAspectRatio) "--no-ar=1"; | ||||
|   }; | ||||
| 
 | ||||
|   program = {nextStage, conf, ...}: bsc.apps.hpcg.override { | ||||
|     inherit (conf) gitBranch; | ||||
|   }; | ||||
| 
 | ||||
|   pipeline = stdexp.stdPipeline ++ [ input exec program ]; | ||||
| 
 | ||||
| in | ||||
| 
 | ||||
|   stdexp.genExperiment { inherit configs pipeline; } | ||||
							
								
								
									
										68
									
								
								garlic/exp/hpcg/ss.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								garlic/exp/hpcg/ss.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,68 @@ | ||||
| { | ||||
|   stdenv | ||||
| , stdexp | ||||
| , bsc | ||||
| , targetMachine | ||||
| , stages | ||||
| , garlicTools | ||||
| , callPackage | ||||
| , enableExtended ? false | ||||
| }: | ||||
| 
 | ||||
| with stdenv.lib; | ||||
| with garlicTools; | ||||
| 
 | ||||
| let | ||||
|   common = callPackage ./common.nix { }; | ||||
| 
 | ||||
|   inherit (common) pipeline getSizePerTask; | ||||
| 
 | ||||
|   # Initial variable configuration | ||||
|   varConf = { | ||||
|     nodes = range2 1 16; | ||||
|     blocksPerCpu = if (enableExtended) | ||||
|       then range2 1 8 | ||||
|       else [ 4 ]; | ||||
|     gitBranch = [ | ||||
|       "garlic/tampi+isend+oss+task" | ||||
|     ]; | ||||
|   }; | ||||
| 
 | ||||
|   # Generate the complete configuration for each unit | ||||
|   genConf = c: targetMachine.config // rec { | ||||
|     expName = "hpcg-ss"; | ||||
|     unitName = "${expName}" | ||||
|     + "-nodes${toString nodes}" | ||||
|     + "-bpc${toString blocksPerCpu}"; | ||||
| 
 | ||||
|     inherit (targetMachine.config) hw; | ||||
| 
 | ||||
|     # hpcg options | ||||
|     inherit (c) nodes blocksPerCpu gitBranch; | ||||
|     totalTasks = ntasksPerNode * nodes; | ||||
|     sizePerCpu = { x=2; y=2; z=128 / totalTasks; }; | ||||
|     sizePerTask = getSizePerTask cpusPerTask sizePerCpu; | ||||
|     nprocs = { x=1; y=1; z=totalTasks; }; | ||||
|     nblocks = blocksPerCpu * cpusPerTask; | ||||
|     ncomms = 1; | ||||
|     disableAspectRatio = true; | ||||
| 
 | ||||
|     # Repeat the execution of each unit several times | ||||
|     loops = 10; | ||||
| 
 | ||||
|     # Resources | ||||
|     qos = "debug"; | ||||
|     time = "02:00:00"; | ||||
|     cpusPerTask = hw.cpusPerSocket; | ||||
|     ntasksPerNode = hw.socketsPerNode; | ||||
|     jobName = unitName; | ||||
|   }; | ||||
| 
 | ||||
|   # Compute the array of configurations | ||||
|   configs = stdexp.buildConfigs { | ||||
|     inherit varConf genConf; | ||||
|   }; | ||||
| 
 | ||||
| in | ||||
| 
 | ||||
|   stdexp.genExperiment { inherit configs pipeline; } | ||||
| @ -65,9 +65,9 @@ | ||||
|       inherit genInput; | ||||
|     }; | ||||
| 
 | ||||
|     ossSlicesStrongscaling = callPackage ./hpcg/oss.slices.strongscaling.nix { | ||||
|       inherit genInput; | ||||
|     }; | ||||
|     ss = callPackage ./hpcg/ss.nix { }; | ||||
| 
 | ||||
|     big.ss = ss.override { enableExtended = true; }; | ||||
|   }; | ||||
| 
 | ||||
|   heat = rec { | ||||
|  | ||||
		Reference in New Issue
	
	Block a user