Reject SSH connections without SLURM allocation

This commit is contained in:
Rodrigo Arias 2025-02-13 14:47:38 +01:00
parent c32c1bd03b
commit 581efb4312
3 changed files with 64 additions and 0 deletions

View File

@ -56,4 +56,20 @@
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script; serviceConfig.ExecStart = script;
}; };
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
} }

View File

@ -27,6 +27,22 @@ let
done done
''; '';
prolog = pkgs.writeScript "prolog.sh" ''
#!/usr/bin/env bash
echo "hello from the prolog"
exit 0
'';
epilog = pkgs.writeScript "epilog.sh" ''
#!/usr/bin/env bash
echo "hello from the epilog"
exit 0
'';
in { in {
systemd.services.slurmd.serviceConfig = { systemd.services.slurmd.serviceConfig = {
# Kill all processes in the control group on stop/restart. This will kill # Kill all processes in the control group on stop/restart. This will kill
@ -93,9 +109,29 @@ in {
# Ignore memory constraints and only use unused cores to share a node with # Ignore memory constraints and only use unused cores to share a node with
# other jobs. # other jobs.
SelectTypeParameters=CR_Core SelectTypeParameters=CR_Core
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
# This sets up the "extern" step into which ssh-launched processes will be
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
# when a task runs (srun) so we can ssh early.
PrologFlags=Alloc,Contain,X11
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
# adopted by the external step, similar to tasks running in regular steps
# LaunchParameters=ulimit_pam_adopt
SlurmdDebug=debug5
#DebugFlags=Protocol,Cgroup
'';
extraCgroupConfig = ''
CgroupPlugin=cgroup/v2
#ConstrainCores=yes
''; '';
}; };
# Place the slurm config in /etc as this will be required by PAM
environment.etc.slurm.source = config.services.slurm.etcSlurm;
age.secrets.mungeKey = { age.secrets.mungeKey = {
file = ../../secrets/munge-key.age; file = ../../secrets/munge-key.age;
owner = "munge"; owner = "munge";

View File

@ -39,6 +39,18 @@ final: prev:
# See https://bugs.schedmd.com/show_bug.cgi?id=19324 # See https://bugs.schedmd.com/show_bug.cgi?id=19324
./slurm-rank-expansion.patch ./slurm-rank-expansion.patch
]; ];
# Install also the pam_slurm_adopt library to restrict users from accessing
# nodes with no job allocated.
postBuild = (old.postBuild or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security"
popd
'';
postInstall = (old.postInstall or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security" install
popd
'';
}); });
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };