diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 02b5208..84ba4c7 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -1,33 +1,10 @@ -{ config, pkgs, lib, ... }: +{ lib, ... }: -let - suspendProgram = pkgs.writeScript "suspend.sh" '' - #!/usr/bin/env bash - exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log - set -x - export "PATH=/run/current-system/sw/bin:$PATH" - echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log - hosts=$(scontrol show hostnames $1) - for host in $hosts; do - echo Shutting down host: $host - ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off - done - ''; +{ + imports = [ + ./slurm-common.nix + ]; - resumeProgram = pkgs.writeScript "resume.sh" '' - #!/usr/bin/env bash - exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log - set -x - export "PATH=/run/current-system/sw/bin:$PATH" - echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log - hosts=$(scontrol show hostnames $1) - for host in $hosts; do - echo Starting host: $host - ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on - done - ''; - -in { systemd.services.slurmd.serviceConfig = { # Kill all processes in the control group on stop/restart. This will kill # all the jobs running, so ensure that we only upgrade when the nodes are @@ -37,90 +14,5 @@ in { KillMode = lib.mkForce "control-group"; }; - services.slurm = { - client.enable = true; - controlMachine = "apex"; - clusterName = "jungle"; - nodeName = [ - "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" - "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" - ]; - - partitionName = [ - "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" - ]; - - # See slurm.conf(5) for more details about these options. - extraConfig = '' - # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but - # not with Intel MPI. For that use the compatibility shim libpmi.so - # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx - # library in SLURM (--mpi=pmix). See more details here: - # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 - MpiDefault=pmix - - # When a node reboots return that node to the slurm queue as soon as it - # becomes operative again. - ReturnToService=2 - - # Track all processes by using a cgroup - ProctrackType=proctrack/cgroup - - # Enable task/affinity to allow the jobs to run in a specified subset of - # the resources. Use the task/cgroup plugin to enable process containment. - TaskPlugin=task/affinity,task/cgroup - - # Power off unused nodes until they are requested - SuspendProgram=${suspendProgram} - SuspendTimeout=60 - ResumeProgram=${resumeProgram} - ResumeTimeout=300 - SuspendExcNodes=hut - - # Turn the nodes off after 1 hour of inactivity - SuspendTime=3600 - - # Reduce port range so we can allow only this range in the firewall - SrunPortRange=60000-61000 - - # Use cores as consumable resources. In SLURM terms, a core may have - # multiple hardware threads (or CPUs). - SelectType=select/cons_tres - - # Ignore memory constraints and only use unused cores to share a node with - # other jobs. - SelectTypeParameters=CR_Core - - # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html - # This sets up the "extern" step into which ssh-launched processes will be - # adopted. Alloc runs the prolog at job allocation (salloc) rather than - # when a task runs (srun) so we can ssh early. - PrologFlags=Alloc,Contain,X11 - - # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes - # adopted by the external step, similar to tasks running in regular steps - # LaunchParameters=ulimit_pam_adopt - SlurmdDebug=debug5 - #DebugFlags=Protocol,Cgroup - ''; - - extraCgroupConfig = '' - CgroupPlugin=cgroup/v2 - #ConstrainCores=yes - ''; - }; - - # Place the slurm config in /etc as this will be required by PAM - environment.etc.slurm.source = config.services.slurm.etcSlurm; - - age.secrets.mungeKey = { - file = ../../secrets/munge-key.age; - owner = "munge"; - group = "munge"; - }; - - services.munge = { - enable = true; - password = config.age.secrets.mungeKey.path; - }; + services.slurm.client.enable = true; } diff --git a/m/module/slurm-common.nix b/m/module/slurm-common.nix new file mode 100644 index 0000000..6af746f --- /dev/null +++ b/m/module/slurm-common.nix @@ -0,0 +1,116 @@ +{ config, pkgs, ... }: + +let + suspendProgram = pkgs.writeScript "suspend.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Shutting down host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off + done + ''; + + resumeProgram = pkgs.writeScript "resume.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Starting host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on + done + ''; + +in { + services.slurm = { + controlMachine = "apex"; + clusterName = "jungle"; + nodeName = [ + "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" + "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" + ]; + + partitionName = [ + "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + ]; + + # See slurm.conf(5) for more details about these options. + extraConfig = '' + # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but + # not with Intel MPI. For that use the compatibility shim libpmi.so + # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx + # library in SLURM (--mpi=pmix). See more details here: + # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 + MpiDefault=pmix + + # When a node reboots return that node to the slurm queue as soon as it + # becomes operative again. + ReturnToService=2 + + # Track all processes by using a cgroup + ProctrackType=proctrack/cgroup + + # Enable task/affinity to allow the jobs to run in a specified subset of + # the resources. Use the task/cgroup plugin to enable process containment. + TaskPlugin=task/affinity,task/cgroup + + # Power off unused nodes until they are requested + SuspendProgram=${suspendProgram} + SuspendTimeout=60 + ResumeProgram=${resumeProgram} + ResumeTimeout=300 + SuspendExcNodes=hut + + # Turn the nodes off after 1 hour of inactivity + SuspendTime=3600 + + # Reduce port range so we can allow only this range in the firewall + SrunPortRange=60000-61000 + + # Use cores as consumable resources. In SLURM terms, a core may have + # multiple hardware threads (or CPUs). + SelectType=select/cons_tres + + # Ignore memory constraints and only use unused cores to share a node with + # other jobs. + SelectTypeParameters=CR_Core + + # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html + # This sets up the "extern" step into which ssh-launched processes will be + # adopted. Alloc runs the prolog at job allocation (salloc) rather than + # when a task runs (srun) so we can ssh early. + PrologFlags=Alloc,Contain,X11 + + # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes + # adopted by the external step, similar to tasks running in regular steps + # LaunchParameters=ulimit_pam_adopt + SlurmdDebug=debug5 + #DebugFlags=Protocol,Cgroup + ''; + + extraCgroupConfig = '' + CgroupPlugin=cgroup/v2 + #ConstrainCores=yes + ''; + }; + + # Place the slurm config in /etc as this will be required by PAM + environment.etc.slurm.source = config.services.slurm.etcSlurm; + + age.secrets.mungeKey = { + file = ../../secrets/munge-key.age; + owner = "munge"; + group = "munge"; + }; + + services.munge = { + enable = true; + password = config.age.secrets.mungeKey.path; + }; +} diff --git a/m/module/slurm-server.nix b/m/module/slurm-server.nix index e7fab8b..f84085d 100644 --- a/m/module/slurm-server.nix +++ b/m/module/slurm-server.nix @@ -1,7 +1,9 @@ { ... }: { - services.slurm = { - server.enable = true; - }; + imports = [ + ./slurm-common.nix + ]; + + services.slurm.server.enable = true; }