2023-04-26 13:35:06 +02:00
|
|
|
{ ... }:
|
|
|
|
|
|
|
|
{
|
|
|
|
services.slurm = {
|
|
|
|
client.enable = true;
|
2023-06-14 11:15:00 +02:00
|
|
|
controlMachine = "hut";
|
2023-06-16 12:00:54 +02:00
|
|
|
clusterName = "jungle";
|
2023-04-26 13:35:06 +02:00
|
|
|
nodeName = [
|
2023-06-14 17:28:00 +02:00
|
|
|
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
|
|
|
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
2023-04-26 13:35:06 +02:00
|
|
|
];
|
2023-06-21 13:16:23 +02:00
|
|
|
|
|
|
|
# See slurm.conf(5) for more details about these options.
|
2023-04-28 17:07:48 +02:00
|
|
|
extraConfig = ''
|
2023-06-16 14:16:14 +02:00
|
|
|
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
|
|
|
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
|
|
|
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
|
|
|
# library in SLURM (--mpi=pmix). See more details here:
|
|
|
|
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
2023-04-28 17:07:48 +02:00
|
|
|
MpiDefault=pmix
|
2023-06-16 14:16:14 +02:00
|
|
|
|
|
|
|
# When a node reboots return that node to the slurm queue as soon as it
|
|
|
|
# becomes operative again.
|
2023-05-18 12:48:04 +02:00
|
|
|
ReturnToService=2
|
2023-06-21 13:16:23 +02:00
|
|
|
|
|
|
|
# Track all processes by using a cgroup
|
|
|
|
ProctrackType=proctrack/cgroup
|
|
|
|
|
|
|
|
# Enable task/affinity to allow the jobs to run in a specified subset of
|
|
|
|
# the resources. Use the task/cgroup plugin to enable process containment.
|
|
|
|
TaskPlugin=task/affinity,task/cgroup
|
2023-04-28 17:07:48 +02:00
|
|
|
'';
|
2023-04-26 13:35:06 +02:00
|
|
|
};
|
|
|
|
}
|