2 Commits

Author SHA1 Message Date
541c16cf44 Enable pam_slurm_adopt in all compute nodes
All checks were successful
CI / build:cross (pull_request) Successful in 6s
CI / build:all (pull_request) Successful in 20s
Prevents access to owl1 and owl2 too if the user doesn't have any jobs
running there.
2025-10-31 11:27:44 +01:00
018d94bd77 Don't suspend owl compute nodes
Currently the owl nodes are located on top of the rack and turning them
off causes a high temperature increase at that region, which accumulates
heat from the whole rack. To maximize airflow we will leave them on at
all times. This also makes allocations immediate at the extra cost of
around 200 W.

In the future, if we include more nodes in SLURM we can configure those
to turn off if needed.

Fixes: #156
2025-10-31 11:27:44 +01:00
33 changed files with 48 additions and 264 deletions

8
flake.lock generated
View File

@@ -2,16 +2,16 @@
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1767634882,
"narHash": "sha256-2GffSfQxe3sedHzK+sTKlYo/NTIAGzbFCIsNMUPAAnk=",
"lastModified": 1752436162,
"narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "3c9db02515ef1d9b6b709fc60ba9a540957f661c",
"rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.11",
"ref": "nixos-25.05",
"repo": "nixpkgs",
"type": "github"
}

View File

@@ -1,6 +1,6 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
};
outputs = { self, nixpkgs, ... }:

View File

@@ -57,18 +57,6 @@
};
};
services.fail2ban = {
enable = true;
maxretry = 5;
bantime-increment = {
enable = true; # Double ban time on each attack
maxtime = "7d"; # Ban up to a week
};
};
# Disable SSH login with password, allow only keypair
services.openssh.settings.PasswordAuthentication = false;
networking.firewall = {
extraCommands = ''
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our

View File

@@ -24,7 +24,7 @@
address = "10.0.40.40";
prefixLength = 24;
} ];
interfaces.ibs785.ipv4.addresses = [ {
interfaces.ibp5s0.ipv4.addresses = [ {
address = "10.0.42.40";
prefixLength = 24;
} ];

View File

@@ -1,10 +1,10 @@
{ pkgs, ... }:
{ pkgs, config, ... }:
{
environment.systemPackages = with pkgs; [
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
ncdu perf ldns pv
ncdu config.boot.kernelPackages.perf ldns pv
# From jungle overlay
osumb nixgen
];

View File

@@ -139,7 +139,6 @@
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
];
shell = pkgs.zsh;
};
pmartin1 = {

View File

@@ -5,5 +5,5 @@
boot.kernelModules = [ "ipmi_watchdog" ];
# Enable systemd watchdog with 30 s interval
systemd.settings.Manager.RuntimeWatchdogSec = 30;
systemd.watchdog.runtimeTime = "30s";
}

View File

@@ -1,6 +1,11 @@
{ pkgs, lib, ... }:
{ config, pkgs, lib, ... }:
{
# add the perf tool
environment.systemPackages = with pkgs; [
config.boot.kernelPackages.perf
];
# allow non-root users to read tracing data from the kernel
boot.kernel.sysctl."kernel.perf_event_paranoid" = -2;
boot.kernel.sysctl."kernel.kptr_restrict" = 0;

View File

@@ -45,7 +45,7 @@
address = "10.0.40.7";
prefixLength = 24;
} ];
interfaces.ibs785.ipv4.addresses = [ {
interfaces.ibp5s0.ipv4.addresses = [ {
address = "10.0.42.7";
prefixLength = 24;
} ];

View File

@@ -46,7 +46,7 @@
address = "10.0.40.42";
prefixLength = 24;
} ];
interfaces.ibs785.ipv4.addresses = [ {
interfaces.ibp5s0.ipv4.addresses = [ {
address = "10.0.42.42";
prefixLength = 24;
} ];

View File

@@ -1,10 +1,3 @@
{
services.nixseparatedebuginfod2 = {
enable = true;
substituters = [
"local:"
"https://cache.nixos.org"
"http://hut/cache"
];
};
services.nixseparatedebuginfod.enable = true;
}

View File

@@ -20,7 +20,7 @@
address = "10.0.40.1";
prefixLength = 24;
} ];
interfaces.ibs785.ipv4.addresses = [ {
interfaces.ibp5s0.ipv4.addresses = [ {
address = "10.0.42.1";
prefixLength = 24;
} ];

View File

@@ -21,7 +21,7 @@
prefixLength = 24;
} ];
# Watch out! The OmniPath device is not in the same place here:
interfaces.ibs801.ipv4.addresses = [ {
interfaces.ibp129s0.ipv4.addresses = [ {
address = "10.0.42.2";
prefixLength = 24;
} ];

View File

@@ -27,7 +27,4 @@
};
};
};
# Allow gitea user to send mail
users.users.gitea.extraGroups = [ "mail-robot" ];
}

View File

@@ -30,8 +30,7 @@ let
amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { };
});
lmbench = callPackage ./pkgs/lmbench/default.nix { };
# Broken and unmantained
# mcxx = callPackage ./pkgs/mcxx/default.nix { };
mcxx = callPackage ./pkgs/mcxx/default.nix { };
meteocat-exporter = prev.callPackage ./pkgs/meteocat-exporter/default.nix { };
mpi = final.mpich; # Set MPICH as default
mpich = callPackage ./pkgs/mpich/default.nix { mpich = prev.mpich; };
@@ -62,14 +61,7 @@ let
tagaspi = callPackage ./pkgs/tagaspi/default.nix { };
tampi = callPackage ./pkgs/tampi/default.nix { };
upc-qaire-exporter = prev.callPackage ./pkgs/upc-qaire-exporter/default.nix { };
taopencl = callPackage ./pkgs/taopencl/default.nix { };
wxparaver = callPackage ./pkgs/paraver/default.nix { };
_cuda = prev._cuda.extend (_: _prev: final.lib.recursiveUpdate _prev {
extensions = _prev.extensions ++ [(finalAttrs: _: {
tacuda = finalAttrs.callPackage ./pkgs/tacuda/default.nix { };
})];
});
};
tests = rec {

View File

@@ -19,7 +19,7 @@ in stdenv.mkDerivation {
'';
hardeningDisable = [ "pic" "format" ];
nativeBuildInputs = kernel.moduleBuildDependencies;
patches = [ ./makefile.patch ./hrtimer.patch ./remove-wr-rdmsrq.patch ];
patches = [ ./makefile.patch ./hrtimer.patch ];
makeFlags = [
"KERNEL_VERSION=${kernel.modDirVersion}"
"KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"

View File

@@ -1,20 +0,0 @@
diff --git a/inc/PwrProfAsm.h b/inc/PwrProfAsm.h
index d77770a..c93a0e9 100644
--- a/inc/PwrProfAsm.h
+++ b/inc/PwrProfAsm.h
@@ -347,6 +347,7 @@
#endif
+/*
#define rdmsrq(msr,val1,val2,val3,val4) ({ \
__asm__ __volatile__( \
"rdmsr\n" \
@@ -362,6 +363,7 @@
:"c"(msr), "a"(val1), "d"(val2), "S"(val3), "D"(val4) \
); \
})
+*/
#define rdmsrpw(msr,val1,val2,val3,val4) ({ \
__asm__ __volatile__( \

View File

@@ -12,7 +12,7 @@ stdenv.mkDerivation (finalAttrs: {
src = ./.;
buildInputs = [
cudatoolkit # Required for nvcc
(lib.getOutput "static" cudaPackages.cuda_cudart) # Required for -lcudart_static
cudaPackages.cuda_cudart.static # Required for -lcudart_static
autoAddDriverRunpath
];
installPhase = ''

View File

@@ -9,6 +9,7 @@
, automake
, libtool
, mpi
, rsync
, gfortran
}:
@@ -43,24 +44,13 @@ stdenv.mkDerivation rec {
configureFlags = [
"--with-infiniband=${rdma-core-all}"
"--with-mpi=yes" # fixes mpi detection when cross-compiling
"--with-mpi=${mpiAll}"
"--with-slurm"
"CFLAGS=-fPIC"
"CXXFLAGS=-fPIC"
];
nativeBuildInputs = [
autoconf
automake
gfortran
libtool
];
buildInputs = [
slurm
mpiAll
rdma-core-all
];
buildInputs = [ slurm mpiAll rdma-core-all autoconf automake libtool rsync gfortran ];
hardeningDisable = [ "all" ];
@@ -70,6 +60,5 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = false; # infiniband detection does not work
};
}

View File

@@ -10,7 +10,7 @@
, zlib
, autoPatchelfHook
, libfabric
, gcc
, gcc13
, wrapCCWith
}:
@@ -33,6 +33,8 @@ let
maintainers = with lib.maintainers.bsc; [ abonerib ];
};
gcc = gcc13;
v = {
hpckit = "2023.1.0";
compiler = "2023.1.0";

View File

@@ -27,10 +27,10 @@ let
# We need to replace the lld linker from bintools with our linker just built,
# otherwise we run into incompatibility issues when mixing compiler and linker
# versions.
bintools-unwrapped = llvmPackages_latest.bintools-unwrapped.override {
bintools-unwrapped = llvmPackages_latest.tools.bintools-unwrapped.override {
lld = clangOmpss2Unwrapped;
};
bintools = llvmPackages_latest.bintools.override {
bintools = llvmPackages_latest.tools.bintools.override {
bintools = bintools-unwrapped;
};
targetConfig = stdenv.targetPlatform.config;

View File

@@ -65,7 +65,6 @@ stdenv.mkDerivation rec {
];
meta = {
broken = true;
homepage = "https://github.com/bsc-pm/mcxx";
description = "C/C++/Fortran source-to-source compilation infrastructure aimed at fast prototyping";
maintainers = with lib.maintainers.bsc; [ rpenacob ];

View File

@@ -1,11 +1,9 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication {
python3Packages.buildPythonApplication rec {
pname = "meteocat-exporter";
version = "1.0";
pyproject = true;
src = ./.;
doCheck = false;

View File

@@ -6,13 +6,6 @@
, pmix
, gfortran
, symlinkJoin
# Disabled when cross-compiling
# To fix cross compilation, we should fill the values in:
# https://github.com/pmodels/mpich/blob/main/maint/fcrosscompile/cross_values.txt.in
# For each arch
, enableFortran ? stdenv.hostPlatform == stdenv.buildPlatform
, perl
, targetPackages
}:
let
@@ -22,13 +15,10 @@ let
paths = [ pmix.dev pmix.out ];
};
in mpich.overrideAttrs (old: {
buildInputs = old.buildInputs ++ [
buildInput = old.buildInputs ++ [
libfabric
pmixAll
];
nativeBuildInputs = old.nativeBuildInputs ++ [
perl
];
configureFlags = [
"--enable-shared"
"--enable-sharedlib"
@@ -41,21 +31,10 @@ in mpich.overrideAttrs (old: {
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
"FCFLAGS=-fallow-argument-mismatch"
] ++ lib.optionals (!enableFortran) [
"--disable-fortran"
];
preFixup = ''
sed -i 's:^CC=.*:CC=${targetPackages.stdenv.cc}/bin/${targetPackages.stdenv.cc.targetPrefix}cc:' $out/bin/mpicc
sed -i 's:^CXX=.*:CXX=${targetPackages.stdenv.cc}/bin/${targetPackages.stdenv.cc.targetPrefix}c++:' $out/bin/mpicxx
'' + lib.optionalString enableFortran ''
sed -i 's:^FC=.*:FC=${targetPackages.gfortran or gfortran}/bin/${targetPackages.gfortran.targetPrefix or gfortran.targetPrefix}gfortran:' $out/bin/mpifort
'';
hardeningDisable = [ "all" ];
meta = old.meta // {
maintainers = old.meta.maintainers ++ (with lib.maintainers.bsc; [ rarias ]);
cross = true;
};
})

View File

@@ -32,11 +32,6 @@ stdenv.mkDerivation rec {
"CXX=mpicxx"
];
env = {
MPICH_CC="${stdenv.cc}/bin/${stdenv.cc.targetPrefix}cc";
MPICH_CXX="${stdenv.cc}/bin/${stdenv.cc.targetPrefix}c++";
};
postInstall = ''
mkdir -p $out/bin
for f in $(find $out -executable -type f); do
@@ -49,6 +44,5 @@ stdenv.mkDerivation rec {
homepage = "http://mvapich.cse.ohio-state.edu/benchmarks/";
maintainers = [ ];
platforms = lib.platforms.all;
cross = true;
};
}

View File

@@ -12,7 +12,7 @@
, paraverKernel
, openssl
, glibcLocales
, wrapGAppsHook3
, wrapGAppsHook
}:
let
@@ -64,7 +64,7 @@ stdenv.mkDerivation rec {
autoconf
automake
autoreconfHook
wrapGAppsHook3
wrapGAppsHook
];
buildInputs = [

View File

@@ -35,6 +35,5 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.mit;
cross = true;
};
}

View File

@@ -1,72 +0,0 @@
{
backendStdenv,
fetchFromGitHub,
automake,
autoconf,
libtool,
gnumake,
autoreconfHook,
boost,
cudatoolkit,
libcublas,
cuda_cudart,
useGit ? false,
gitUrl ? "git@gitlab-internal.bsc.es:task-awareness/tacuda/tacuda.git",
gitBranch ? "main",
gitCommit ? "35234f9445e6149a2bd38d119841e2485d6ee05e",
}:
let
release_ver = "2.1.0";
release = {
version = release_ver;
src = fetchFromGitHub {
owner = "bsc-pm";
repo = "tacuda";
rev = release_ver;
hash = "sha256-Cj3EiLVJSLvRv0ydeg7Vp4SpkniEqHkcWF+YOJQ8EcM=";
};
};
git = rec {
version = src.shortRev;
src = builtins.fetchGit {
url = gitUrl;
ref = gitBranch;
rev = gitCommit;
};
};
source = if (useGit) then git else release;
in
backendStdenv.mkDerivation {
pname = "tacuda";
inherit (source) src version;
enableParallelBuilding = true;
separateDebugInfo = true;
strictDeps = true;
nativeBuildInputs = [
autoreconfHook
automake
autoconf
libtool
gnumake
];
patches = [ ./fix_config.patch ];
configureFlags = [ "--with-cuda-include=${cudatoolkit}/include" ];
buildInputs = [
boost
libcublas
cuda_cudart
];
}

View File

@@ -1,13 +0,0 @@
diff --git a/m4/cuda.m4 b/m4/cuda.m4
index 23f5c94..8f9b534 100644
--- a/m4/cuda.m4
+++ b/m4/cuda.m4
@@ -40,7 +40,7 @@ search_libs="cuda cublas cudart"
required_libs=""
m4_foreach([function],
- [cuInit,
+ [
cublasSgemm,
cudaStreamCreate,
cudaLaunchKernel,

View File

@@ -5,14 +5,23 @@
, automake
, autoconf
, libtool
, mpi
, autoreconfHook
, gpi-2
, boost
, numactl
, rdma-core
, gfortran
, symlinkJoin
}:
let
mpiAll = symlinkJoin {
name = "mpi-all";
paths = [ mpi.all ];
};
in
stdenv.mkDerivation rec {
pname = "tagaspi";
enableParallelBuilding = true;
@@ -26,18 +35,16 @@ stdenv.mkDerivation rec {
hash = "sha256-RGG/Re2uM293HduZfGzKUWioDtwnSYYdfeG9pVrX9EM=";
};
nativeBuildInputs = [
buildInputs = [
autoreconfHook
automake
autoconf
libtool
gfortran
];
buildInputs = [
boost
numactl
rdma-core
gfortran
mpiAll
];
dontDisableStatic = true;
@@ -56,6 +63,5 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = false; # gpi-2 cannot cross
};
}

View File

@@ -68,6 +68,5 @@ in stdenv.mkDerivation {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = true;
};
}

View File

@@ -1,48 +0,0 @@
{
stdenv,
automake,
autoconf,
libtool,
gnumake,
boost,
mpi,
autoreconfHook,
ocl-icd,
opencl-headers,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "taopencl";
version = finalAttrs.src.shortRev;
src = builtins.fetchGit {
url = "git@gitlab-internal.bsc.es:task-awareness/taopencl/taopencl.git";
ref = "master";
rev = "c3b7b32ae8fa4af7ceff598532a881f8f1490aaf";
};
enableParallelBuilding = true;
separateDebugInfo = true;
strictDeps = true;
configureFlags = [
"--with-opencl-lib=${ocl-icd}/lib"
"--with-opencl-include=${opencl-headers}/include"
];
nativeBuildInputs = [
autoreconfHook
automake
autoconf
libtool
gnumake
];
buildInputs = [
boost
mpi
];
dontDisableStatic = true;
hardeningDisable = [ "all" ];
})

View File

@@ -1,11 +1,9 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication {
python3Packages.buildPythonApplication rec {
pname = "upc-qaire-exporter";
version = "1.0";
pyproject = true;
src = ./.;
doCheck = false;