Enable memory limits in SLURM
Make sure that jobs cannot allocate more memory than available so we don't trigger the OOM killer. Fixes: #178
This commit is contained in:
@@ -5,8 +5,8 @@
|
||||
controlMachine = "apex";
|
||||
clusterName = "jungle";
|
||||
nodeName = [
|
||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
||||
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
|
||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 MemSpecLimit=4096 RealMemory=128797"
|
||||
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1 MemSpecLimit=4096 RealMemory=773659"
|
||||
];
|
||||
|
||||
partitionName = [
|
||||
@@ -41,9 +41,9 @@
|
||||
# multiple hardware threads (or CPUs).
|
||||
SelectType=select/cons_tres
|
||||
|
||||
# Ignore memory constraints and only use unused cores to share a node with
|
||||
# other jobs.
|
||||
SelectTypeParameters=CR_Core
|
||||
# Both cores and memory are consumable resources, so we can put a limit in
|
||||
# memory as well.
|
||||
SelectTypeParameters=CR_Core_Memory
|
||||
|
||||
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
# This sets up the "extern" step into which ssh-launched processes will be
|
||||
@@ -58,7 +58,9 @@
|
||||
|
||||
extraCgroupConfig = ''
|
||||
CgroupPlugin=cgroup/v2
|
||||
#ConstrainCores=yes
|
||||
ConstrainRAMSpace=yes
|
||||
ConstrainSwapSpace=yes
|
||||
AllowedRAMSpace=99
|
||||
'';
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user