Split slurm configuration for client and server
Reviewed-by: Aleix Boné <abonerib@bsc.es>
This commit is contained in:
		
							parent
							
								
									ee895d2e4f
								
							
						
					
					
						commit
						f569933732
					
				| @ -1,33 +1,10 @@ | |||||||
| { config, pkgs, lib, ... }: | { lib, ... }: | ||||||
| 
 | 
 | ||||||
| let | { | ||||||
|   suspendProgram = pkgs.writeScript "suspend.sh" '' |   imports = [ | ||||||
|     #!/usr/bin/env bash |     ./slurm-common.nix | ||||||
|     exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log |   ]; | ||||||
|     set -x |  | ||||||
|     export "PATH=/run/current-system/sw/bin:$PATH" |  | ||||||
|     echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log |  | ||||||
|     hosts=$(scontrol show hostnames $1) |  | ||||||
|     for host in $hosts; do |  | ||||||
|       echo Shutting down host: $host |  | ||||||
|       ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off |  | ||||||
|     done |  | ||||||
|   ''; |  | ||||||
| 
 | 
 | ||||||
|   resumeProgram = pkgs.writeScript "resume.sh" '' |  | ||||||
|     #!/usr/bin/env bash |  | ||||||
|     exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log |  | ||||||
|     set -x |  | ||||||
|     export "PATH=/run/current-system/sw/bin:$PATH" |  | ||||||
|     echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log |  | ||||||
|     hosts=$(scontrol show hostnames $1) |  | ||||||
|     for host in $hosts; do |  | ||||||
|       echo Starting host: $host |  | ||||||
|       ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on |  | ||||||
|     done |  | ||||||
|   ''; |  | ||||||
| 
 |  | ||||||
| in { |  | ||||||
|   systemd.services.slurmd.serviceConfig = { |   systemd.services.slurmd.serviceConfig = { | ||||||
|     # Kill all processes in the control group on stop/restart. This will kill |     # Kill all processes in the control group on stop/restart. This will kill | ||||||
|     # all the jobs running, so ensure that we only upgrade when the nodes are |     # all the jobs running, so ensure that we only upgrade when the nodes are | ||||||
| @ -37,90 +14,5 @@ in { | |||||||
|     KillMode = lib.mkForce "control-group"; |     KillMode = lib.mkForce "control-group"; | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|   services.slurm = { |   services.slurm.client.enable = true; | ||||||
|     client.enable = true; |  | ||||||
|     controlMachine = "apex"; |  | ||||||
|     clusterName = "jungle"; |  | ||||||
|     nodeName = [ |  | ||||||
|       "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" |  | ||||||
|       "hut       Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" |  | ||||||
|     ]; |  | ||||||
| 
 |  | ||||||
|     partitionName = [ |  | ||||||
|       "owl Nodes=owl[1-2]     Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" |  | ||||||
|     ]; |  | ||||||
| 
 |  | ||||||
|     # See slurm.conf(5) for more details about these options. |  | ||||||
|     extraConfig = '' |  | ||||||
|       # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but |  | ||||||
|       # not with Intel MPI. For that use the compatibility shim libpmi.so |  | ||||||
|       # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx |  | ||||||
|       # library in SLURM (--mpi=pmix). See more details here: |  | ||||||
|       # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 |  | ||||||
|       MpiDefault=pmix |  | ||||||
| 
 |  | ||||||
|       # When a node reboots return that node to the slurm queue as soon as it |  | ||||||
|       # becomes operative again. |  | ||||||
|       ReturnToService=2 |  | ||||||
| 
 |  | ||||||
|       # Track all processes by using a cgroup |  | ||||||
|       ProctrackType=proctrack/cgroup |  | ||||||
| 
 |  | ||||||
|       # Enable task/affinity to allow the jobs to run in a specified subset of |  | ||||||
|       # the resources. Use the task/cgroup plugin to enable process containment. |  | ||||||
|       TaskPlugin=task/affinity,task/cgroup |  | ||||||
| 
 |  | ||||||
|       # Power off unused nodes until they are requested |  | ||||||
|       SuspendProgram=${suspendProgram} |  | ||||||
|       SuspendTimeout=60 |  | ||||||
|       ResumeProgram=${resumeProgram} |  | ||||||
|       ResumeTimeout=300 |  | ||||||
|       SuspendExcNodes=hut |  | ||||||
| 
 |  | ||||||
|       # Turn the nodes off after 1 hour of inactivity |  | ||||||
|       SuspendTime=3600 |  | ||||||
| 
 |  | ||||||
|       # Reduce port range so we can allow only this range in the firewall |  | ||||||
|       SrunPortRange=60000-61000 |  | ||||||
| 
 |  | ||||||
|       # Use cores as consumable resources. In SLURM terms, a core may have |  | ||||||
|       # multiple hardware threads (or CPUs). |  | ||||||
|       SelectType=select/cons_tres |  | ||||||
| 
 |  | ||||||
|       # Ignore memory constraints and only use unused cores to share a node with |  | ||||||
|       # other jobs. |  | ||||||
|       SelectTypeParameters=CR_Core |  | ||||||
| 
 |  | ||||||
|       # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html |  | ||||||
|       # This sets up the "extern" step into which ssh-launched processes will be |  | ||||||
|       # adopted. Alloc runs the prolog at job allocation (salloc) rather than |  | ||||||
|       # when a task runs (srun) so we can ssh early. |  | ||||||
|       PrologFlags=Alloc,Contain,X11 |  | ||||||
| 
 |  | ||||||
|       # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes |  | ||||||
|       # adopted by the external step, similar to tasks running in regular steps |  | ||||||
|       # LaunchParameters=ulimit_pam_adopt |  | ||||||
|       SlurmdDebug=debug5 |  | ||||||
|       #DebugFlags=Protocol,Cgroup |  | ||||||
|     ''; |  | ||||||
| 
 |  | ||||||
|     extraCgroupConfig = '' |  | ||||||
|       CgroupPlugin=cgroup/v2 |  | ||||||
|       #ConstrainCores=yes |  | ||||||
|     ''; |  | ||||||
|   }; |  | ||||||
| 
 |  | ||||||
|   # Place the slurm config in /etc as this will be required by PAM |  | ||||||
|   environment.etc.slurm.source = config.services.slurm.etcSlurm; |  | ||||||
| 
 |  | ||||||
|   age.secrets.mungeKey = { |  | ||||||
|     file = ../../secrets/munge-key.age; |  | ||||||
|     owner = "munge"; |  | ||||||
|     group = "munge"; |  | ||||||
|   }; |  | ||||||
| 
 |  | ||||||
|   services.munge = { |  | ||||||
|     enable = true; |  | ||||||
|     password = config.age.secrets.mungeKey.path; |  | ||||||
|   }; |  | ||||||
| } | } | ||||||
|  | |||||||
							
								
								
									
										116
									
								
								m/module/slurm-common.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								m/module/slurm-common.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,116 @@ | |||||||
|  | { config, pkgs, ... }: | ||||||
|  | 
 | ||||||
|  | let | ||||||
|  |   suspendProgram = pkgs.writeScript "suspend.sh" '' | ||||||
|  |     #!/usr/bin/env bash | ||||||
|  |     exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log | ||||||
|  |     set -x | ||||||
|  |     export "PATH=/run/current-system/sw/bin:$PATH" | ||||||
|  |     echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log | ||||||
|  |     hosts=$(scontrol show hostnames $1) | ||||||
|  |     for host in $hosts; do | ||||||
|  |       echo Shutting down host: $host | ||||||
|  |       ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off | ||||||
|  |     done | ||||||
|  |   ''; | ||||||
|  | 
 | ||||||
|  |   resumeProgram = pkgs.writeScript "resume.sh" '' | ||||||
|  |     #!/usr/bin/env bash | ||||||
|  |     exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log | ||||||
|  |     set -x | ||||||
|  |     export "PATH=/run/current-system/sw/bin:$PATH" | ||||||
|  |     echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log | ||||||
|  |     hosts=$(scontrol show hostnames $1) | ||||||
|  |     for host in $hosts; do | ||||||
|  |       echo Starting host: $host | ||||||
|  |       ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on | ||||||
|  |     done | ||||||
|  |   ''; | ||||||
|  | 
 | ||||||
|  | in { | ||||||
|  |   services.slurm = { | ||||||
|  |     controlMachine = "apex"; | ||||||
|  |     clusterName = "jungle"; | ||||||
|  |     nodeName = [ | ||||||
|  |       "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" | ||||||
|  |       "hut       Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" | ||||||
|  |     ]; | ||||||
|  | 
 | ||||||
|  |     partitionName = [ | ||||||
|  |       "owl Nodes=owl[1-2]     Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" | ||||||
|  |     ]; | ||||||
|  | 
 | ||||||
|  |     # See slurm.conf(5) for more details about these options. | ||||||
|  |     extraConfig = '' | ||||||
|  |       # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but | ||||||
|  |       # not with Intel MPI. For that use the compatibility shim libpmi.so | ||||||
|  |       # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx | ||||||
|  |       # library in SLURM (--mpi=pmix). See more details here: | ||||||
|  |       # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 | ||||||
|  |       MpiDefault=pmix | ||||||
|  | 
 | ||||||
|  |       # When a node reboots return that node to the slurm queue as soon as it | ||||||
|  |       # becomes operative again. | ||||||
|  |       ReturnToService=2 | ||||||
|  | 
 | ||||||
|  |       # Track all processes by using a cgroup | ||||||
|  |       ProctrackType=proctrack/cgroup | ||||||
|  | 
 | ||||||
|  |       # Enable task/affinity to allow the jobs to run in a specified subset of | ||||||
|  |       # the resources. Use the task/cgroup plugin to enable process containment. | ||||||
|  |       TaskPlugin=task/affinity,task/cgroup | ||||||
|  | 
 | ||||||
|  |       # Power off unused nodes until they are requested | ||||||
|  |       SuspendProgram=${suspendProgram} | ||||||
|  |       SuspendTimeout=60 | ||||||
|  |       ResumeProgram=${resumeProgram} | ||||||
|  |       ResumeTimeout=300 | ||||||
|  |       SuspendExcNodes=hut | ||||||
|  | 
 | ||||||
|  |       # Turn the nodes off after 1 hour of inactivity | ||||||
|  |       SuspendTime=3600 | ||||||
|  | 
 | ||||||
|  |       # Reduce port range so we can allow only this range in the firewall | ||||||
|  |       SrunPortRange=60000-61000 | ||||||
|  | 
 | ||||||
|  |       # Use cores as consumable resources. In SLURM terms, a core may have | ||||||
|  |       # multiple hardware threads (or CPUs). | ||||||
|  |       SelectType=select/cons_tres | ||||||
|  | 
 | ||||||
|  |       # Ignore memory constraints and only use unused cores to share a node with | ||||||
|  |       # other jobs. | ||||||
|  |       SelectTypeParameters=CR_Core | ||||||
|  | 
 | ||||||
|  |       # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html | ||||||
|  |       # This sets up the "extern" step into which ssh-launched processes will be | ||||||
|  |       # adopted. Alloc runs the prolog at job allocation (salloc) rather than | ||||||
|  |       # when a task runs (srun) so we can ssh early. | ||||||
|  |       PrologFlags=Alloc,Contain,X11 | ||||||
|  | 
 | ||||||
|  |       # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes | ||||||
|  |       # adopted by the external step, similar to tasks running in regular steps | ||||||
|  |       # LaunchParameters=ulimit_pam_adopt | ||||||
|  |       SlurmdDebug=debug5 | ||||||
|  |       #DebugFlags=Protocol,Cgroup | ||||||
|  |     ''; | ||||||
|  | 
 | ||||||
|  |     extraCgroupConfig = '' | ||||||
|  |       CgroupPlugin=cgroup/v2 | ||||||
|  |       #ConstrainCores=yes | ||||||
|  |     ''; | ||||||
|  |   }; | ||||||
|  | 
 | ||||||
|  |   # Place the slurm config in /etc as this will be required by PAM | ||||||
|  |   environment.etc.slurm.source = config.services.slurm.etcSlurm; | ||||||
|  | 
 | ||||||
|  |   age.secrets.mungeKey = { | ||||||
|  |     file = ../../secrets/munge-key.age; | ||||||
|  |     owner = "munge"; | ||||||
|  |     group = "munge"; | ||||||
|  |   }; | ||||||
|  | 
 | ||||||
|  |   services.munge = { | ||||||
|  |     enable = true; | ||||||
|  |     password = config.age.secrets.mungeKey.path; | ||||||
|  |   }; | ||||||
|  | } | ||||||
| @ -1,7 +1,9 @@ | |||||||
| { ... }: | { ... }: | ||||||
| 
 | 
 | ||||||
| { | { | ||||||
|   services.slurm = { |   imports = [ | ||||||
|     server.enable = true; |     ./slurm-common.nix | ||||||
|   }; |   ]; | ||||||
|  | 
 | ||||||
|  |   services.slurm.server.enable = true; | ||||||
| } | } | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user