7 Commits

Author SHA1 Message Date
69008f17fc Remove SLURM partition all
We no longer have homogeneous nodes so it doesn't make much sense to
allocate a mix of them.
2025-04-07 16:20:26 +02:00
bad08110fa Add varcila user to hut and fox 2025-04-07 16:20:26 +02:00
66704d682a Adjust fox slurm config after disabling SMT 2025-04-07 16:20:26 +02:00
80e7ecd6ea Add abonerib user to fox 2025-04-07 16:20:26 +02:00
4405d21dae Don't move doc in web output 2025-04-07 16:20:26 +02:00
eb09732100 Add quickstart guide 2025-04-07 16:20:26 +02:00
c1ca6e3e2c Reject SSH connections without SLURM allocation 2025-04-07 16:20:21 +02:00
8 changed files with 320 additions and 8 deletions

View File

@@ -81,7 +81,7 @@
home = "/home/Computational/abonerib";
description = "Aleix Boné";
group = "Computational";
hosts = [ "owl1" "owl2" "hut" "raccoon" ];
hosts = [ "owl1" "owl2" "hut" "raccoon" "fox" ];
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
@@ -126,6 +126,19 @@
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
];
};
varcila = {
uid = 5650;
isNormalUser = true;
home = "/home/Computational/varcila";
description = "Vincent Arcila";
group = "Computational";
hosts = [ "hut" "fox" ];
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
];
};
};
groups = {

View File

@@ -56,4 +56,20 @@
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

View File

@@ -12,6 +12,8 @@ let
installPhase = ''
cp -r public $out
'';
# Don't mess doc/
dontFixup = true;
};
in
{

View File

@@ -43,14 +43,13 @@ in {
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=2 Feature=fox"
"fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=1 Feature=fox"
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
@@ -93,9 +92,29 @@ in {
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_Core
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
# This sets up the "extern" step into which ssh-launched processes will be
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
# when a task runs (srun) so we can ssh early.
PrologFlags=Alloc,Contain,X11
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
# adopted by the external step, similar to tasks running in regular steps
# LaunchParameters=ulimit_pam_adopt
SlurmdDebug=debug5
#DebugFlags=Protocol,Cgroup
'';
extraCgroupConfig = ''
CgroupPlugin=cgroup/v2
#ConstrainCores=yes
'';
};
# Place the slurm config in /etc as this will be required by PAM
environment.etc.slurm.source = config.services.slurm.etcSlurm;
age.secrets.mungeKey = {
file = ../../secrets/munge-key.age;
owner = "munge";

View File

@@ -39,6 +39,18 @@ final: prev:
# See https://bugs.schedmd.com/show_bug.cgi?id=19324
./slurm-rank-expansion.patch
];
# Install also the pam_slurm_adopt library to restrict users from accessing
# nodes with no job allocated.
postBuild = (old.postBuild or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security"
popd
'';
postInstall = (old.postInstall or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security" install
popd
'';
});
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };

10
web/content/doc/_index.md Normal file
View File

@@ -0,0 +1,10 @@
---
title: "Docs"
description: "Documentation for users of jungle machines"
date: 2023-09-15
---
If this is the first time you use any of the jungle machines with NixOS, follow
the [quick start guide](quickstart).

View File

@@ -0,0 +1,234 @@
---
title: "Quick start"
date: 2023-09-15
---
This documentation will guide you on how to build custom packages of software
and use them in the jungle machines. It has been designed to reduce the friction
from users coming from module systems.
You should be able to access the jungle machines, otherwise [request
access](/access).
## Changes from other HPC machines
Users of other machines have been using the Lmod tool (module load ...) to add
or remove programs from their environment, as well as manually building their
own software for too many years.
While we cannot prevent users from continuing to use this tedious mechanism, we
have designed the jungle machines to be much easier to operate by using the nix
package manager.
### Freedom to install packages
When a user wanted to install a package, it was forced to either do it on its
own directory, or request a system administrator to install it in a shared
directory, so other users can also use that package.
This situation is gone, each user can install any package of software by
themselves, without requiring any other authorization. When two users request
the same package, the same copy will be provided.
A new package will be downloaded if it is available (someone already built it)
or will be built from source on demand.
### No changes over time
All users retain the same versions of the packages they request until they
decide to update them.
## Using nix to manage packages
In this chapter we show how to install packages and enter a development shell to
build new programs from source. The examples are done from the hut machine,
read [this page](/access) to request access.
### Installing binaries
To temporarily install new packages, use:
```text
hut% nix shell jungle#gcc jungle#cowsay jungle#ovni
```
Notice that the packages are described as two parts divided by the `#` symbol.
The first part defines where to take the package from and the second part is
the name of the package. For now we will use `jungle#<package>`. You can find
many more packages here:
<https://search.nixos.org/packages>
You will now enter a new shell, where those requested package **binaries are
available in $PATH**:
```text
hut% cowsay hello world
_____________
< hello world >
-------------
\ ^__^
\ (oo)\_______
(__)\ )\/\
||----w |
|| ||
hut% ovniver
LD_LIBRARY_PATH not set
libovni: build v1.11.0 (a7103f8), dynamic v1.11.0 (a7103f8)
hut% gcc --version
gcc (GCC) 13.3.0
Copyright (C) 2023 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
```
### Building programs
The above method only loads new binaries in the `$PATH`. If we try to build a
program that includes headers or links with a library, it will fail to find
them:
```text
hut$ cat test.c
#include <ovni.h>
int main()
{
ovni_version_check();
return 0;
}
hut% gcc test.c -lovni -o test
test.c:1:10: fatal error: ovni.h: No such file or directory
1 | #include <ovni.h>
| ^~~~~~~~
compilation terminated.
```
We could manually add the full path to the ovni include directory with `-I` and
the libraries with `-L`, but there is a tool that already perform these steps
automatically for us, `nix develop`.
Let's go back to our original shell first, where those packages are not
available anymore:
```
hut% ps
PID TTY TIME CMD
2356260 pts/1 00:00:01 zsh
2457268 pts/1 00:00:00 zsh
2457297 pts/1 00:00:00 ps
hut% exit
hut% ovniver
ovniver: command not found
```
### Creating a flake.nix
To define which packages we want, we will write a small file that list them, a
flake.nix file.
First, we will create a new directory where we are going to be working:
```
hut% mkdir example
hut% cd exmple
```
Then place this flake.nix file:
```nix
{
inputs.jungle.url = "jungle";
outputs = { self, jungle }:
let
pkgs = jungle.outputs.packages.x86_64-linux;
in {
devShells.x86_64-linux.default = pkgs.mkShell {
pname = "devshell";
buildInputs = with pkgs; [
ovni gcc cowsay # more packages here...
];
};
};
}
```
Now enter the shell with:
```
hut% nix develop
warning: creating lock file '/home/Computational/rarias/example/flake.lock':
• Added input 'jungle':
'path:/nix/store/27srv8haj6vv4ywrbmw0a8vds561m8rq-source?lastModified=1739479441&narHash=sha256-Kgjs8SO1w9NbPBu8ghwzCxYJ9kvWpoQOT%2BXwPvA9DcU%3D&rev=76396c0d67ef0cf32377d5c1894bb695293bca9d' (2025-02-13)
• Added input 'jungle/agenix':
'github:ryantm/agenix/f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41?narHash=sha256-b%2Buqzj%2BWa6xgMS9aNbX4I%2BsXeb5biPDi39VgvSFqFvU%3D' (2024-08-10)
• Added input 'jungle/agenix/darwin':
'github:lnl7/nix-darwin/4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d?narHash=sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0%3D' (2023-11-24)
• Added input 'jungle/agenix/darwin/nixpkgs':
follows 'jungle/agenix/nixpkgs'
• Added input 'jungle/agenix/home-manager':
'github:nix-community/home-manager/3bfaacf46133c037bb356193bd2f1765d9dc82c1?narHash=sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE%3D' (2023-12-20)
• Added input 'jungle/agenix/home-manager/nixpkgs':
follows 'jungle/agenix/nixpkgs'
• Added input 'jungle/agenix/nixpkgs':
follows 'jungle/nixpkgs'
• Added input 'jungle/agenix/systems':
'github:nix-systems/default/da67096a3b9bf56a91d16901293e51ba5b49a27e?narHash=sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768%3D' (2023-04-09)
• Added input 'jungle/bscpkgs':
'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f' (2024-11-29)
• Added input 'jungle/bscpkgs/nixpkgs':
follows 'jungle/nixpkgs'
• Added input 'jungle/nixpkgs':
'github:NixOS/nixpkgs/9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc?narHash=sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8%3D' (2025-01-14)
hut$
```
Notice that long list of messages is Nix creating a new flake.lock file with the
current state of the packages. Next invocations will use the same packages as
described by the lock file.
### Building a program from nix develop
Now let's try again building our test program:
```text
hut$ cat test.c
#include <ovni.h>
int main()
{
ovni_version_check();
return 0;
}
hut$ gcc test.c -o test -lovni
hut$ ldd test
linux-vdso.so.1 (0x00007ffff7fc4000)
libovni.so.1 => /nix/store/sqk972akjv0q8dchn8ccjln2llzyyfd0-ovni-1.11.0/lib/libovni.so.1 (0x00007ffff7fab000)
libc.so.6 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/libc.so.6 (0x00007ffff7db2000)
/nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/ld-linux-x86-64.so.2 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib64/ld-linux-x86-64.so.2 (0x00007ffff7fc6000)
hut$ ./test
```
Now the ovni.h header and the libovni library are found and the program is
successfully built, linked and executed.
You can add more packages as needed in your flake.nix:
```nix
buildInputs = with pkgs; [
ovni gcc cowsay # more packages here...
];
```
Make sure you exit the develop shell first, and then enter again with `nix
develop`.
## Remember
- `nix shell` places binaries in the `$PATH`.
- `nix develop` enters a development shell where both binaries and the libraries
and includes are available so you can build new programs.

View File

@@ -3,32 +3,38 @@ languageCode = 'en-us'
title = 'The jungle'
theme = 'PaperMod'
[[menu.main]]
identifier = "doc"
name = "Docs"
url = "/doc/"
weight = 10
[[menu.main]]
identifier = "grafana"
name = "Grafana"
url = "/grafana/"
weight = 10
weight = 20
[[menu.main]]
identifier = "Git"
name = "Git"
url = "/git/"
weight = 20
weight = 30
[[menu.main]]
identifier = "Lists"
name = "Lists"
url = "/lists/"
weight = 30
weight = 40
[[menu.main]]
identifier = "Paste"
name = "Paste"
url = "/paste/"
weight = 40
weight = 50
[[menu.main]]
identifier = "Posts"
name = "Posts"
url = "/posts/"
weight = 50
weight = 60