Restart slurmd on failure #180

Manually merged
rarias merged 1 commits from restart-slurmd into master 2025-09-30 17:26:58 +02:00
Showing only changes of commit 79940876c3 - Show all commits

View File

@@ -12,6 +12,12 @@
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
KillMode = lib.mkForce "control-group";
# If slurmd fails to contact the control server it will fail, causing the
# node to remain out of service until manually restarted. Always try to
# restart it.
Restart = "always";
rarias marked this conversation as resolved Outdated

Are there any situations where we want a clean exit to happen or could we do always?

Are there any situations where we want a clean exit to happen or could we do `always`?
RestartSec = "30s";
};
services.slurm.client.enable = true;