diff --git a/m/common/slurm.nix b/m/common/slurm.nix index 08de3fd..b02a914 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -1,6 +1,14 @@ -{ ... }: +{ lib, ... }: { + systemd.services.slurmd.serviceConfig = { + # Kill all processes in the control group on stop/restart. This will kill + # all the jobs running, so ensure that we only upgrade when the nodes are + # not in use. See: + # https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb + # https://bugs.schedmd.com/show_bug.cgi?id=2095#c24 + KillMode = lib.mkForce "control-group"; + }; services.slurm = { client.enable = true; controlMachine = "hut";