Compare commits

...

18 Commits

Author SHA1 Message Date
a173af654f
Fix osu cross-compilation
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 16:23:46 +01:00
2fff7e4a7b
Set mpich default compilers from targetPackages
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 16:23:46 +01:00
a761b73336
Enable meta.cross for mpich related packages
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 16:23:46 +01:00
86eb796771
Disable meta.cross for gpi-2 and tagaspi
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 16:23:46 +01:00
08633435cf
Fix nativeBuildInputs for tagaspi
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 16:23:46 +01:00
39d64456a4
Fix nativeBuildInputs for gpi-2
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 16:23:46 +01:00
410040a4a0
Fix mpich cross compilation (disable fortran)
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 16:23:46 +01:00
fc69ef3217 Enable pam_slurm_adopt in all compute nodes
Prevents access to owl1 and owl2 too if the user doesn't have any jobs
running there.

Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-31 11:41:50 +01:00
1d025f7a38 Don't suspend owl compute nodes
Currently the owl nodes are located on top of the rack and turning them
off causes a high temperature increase at that region, which accumulates
heat from the whole rack. To maximize airflow we will leave them on at
all times. This also makes allocations immediate at the extra cost of
around 200 W.

In the future, if we include more nodes in SLURM we can configure those
to turn off if needed.

Fixes: rarias/jungle#156
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-31 11:41:44 +01:00
7989779c8f Filter out packages by platform from crossSet
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
Tested-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 11:21:13 +01:00
7d721084a7 Add meta to cudainfo
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 11:20:01 +01:00
796d34a549 Set amd-uprof platforms to x86_64-linux only
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-31 11:19:57 +01:00
5ff1b1343b Add nixgen to all machines
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-29 16:28:05 +01:00
c5cc13fad8 Add nixgen package
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-29 16:27:56 +01:00
2e09314a7e Update OmpSs-2 LLVM to 2025.11
Reviewed-by: Aleix Boné <abonerib@bsc.es>
Tested-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-10-29 16:22:57 +01:00
217d9c1fc0 Update NODES to 1.4
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-29 16:21:46 +01:00
f47ab7757e Update nOS-V to 4.0.0
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-29 16:21:43 +01:00
4b265c071e Update ovni to 1.13.0
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-29 16:21:32 +01:00
19 changed files with 218 additions and 91 deletions

View File

@ -5,8 +5,8 @@
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns pv
# From bsckgs overlay
osumb
# From jungle overlay
osumb nixgen
];
programs.direnv.enable = true;

View File

@ -93,20 +93,4 @@
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

View File

@ -1,4 +1,4 @@
{ lib, ... }:
{ lib, pkgs, ... }:
{
imports = [
@ -21,4 +21,20 @@
};
services.slurm.client.enable = true;
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

View File

@ -1,31 +1,6 @@
{ config, pkgs, ... }:
let
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeShellScript "resume.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
in {
{
services.slurm = {
controlMachine = "apex";
clusterName = "jungle";
@ -59,16 +34,6 @@ in {
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=fox
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000

View File

@ -37,6 +37,7 @@ let
nanos6 = callPackage ./pkgs/nanos6/default.nix { };
nanos6Debug = final.nanos6.override { enableDebug = true; };
nixtools = callPackage ./pkgs/nixtools/default.nix { };
nixgen = callPackage ./pkgs/nixgen/default.nix { };
# Broken because of pkgsStatic.libcap
# See: https://github.com/NixOS/nixpkgs/pull/268791
#nix-wrap = callPackage ./pkgs/nix-wrap/default.nix { };
@ -100,14 +101,16 @@ let
pkgsTopLevel = filterAttrs (_: isDerivation) bscPkgs;
# Native build in that platform doesn't imply cross build works
canCrossCompile = platform: pkg:
canCrossCompile = platform: default: pkg:
(isDerivation pkg) &&
# Must be defined explicitly
(pkg.meta.cross or false) &&
(meta.availableOn platform pkg);
# If meta.cross is undefined, use default
(pkg.meta.cross or default) &&
(meta.availableOn final.pkgsCross.${platform}.stdenv.hostPlatform pkg);
# For now only RISC-V
crossSet = { riscv64 = final.pkgsCross.riscv64.bsc.pkgsTopLevel; };
crossSet = genAttrs [ "riscv64" ] (platform:
filterAttrs (_: canCrossCompile platform true)
final.pkgsCross.${platform}.bsc.pkgsTopLevel);
buildList = name: paths:
final.runCommandLocal name { } ''
@ -127,7 +130,7 @@ let
# For now only RISC-V
crossList = buildList "ci-cross"
(filter
(canCrossCompile final.pkgsCross.riscv64.stdenv.hostPlatform)
(canCrossCompile "riscv64" false) # opt-in (pkgs with: meta.cross = true)
(builtins.attrValues crossSet.riscv64));
in bscPkgs // {

View File

@ -90,7 +90,7 @@ in
meta = {
description = "Performance analysis tool-suite for x86 based applications";
homepage = "https://www.amd.com/es/developer/uprof.html";
platforms = lib.platforms.linux;
platforms = [ "x86_64-linux" ];
license = lib.licenses.unfree;
maintainers = with lib.maintainers.bsc; [ rarias varcila ];
};

View File

@ -1,5 +1,6 @@
{
stdenv
, lib
, cudatoolkit
, cudaPackages
, autoAddDriverRunpath
@ -40,4 +41,9 @@ stdenv.mkDerivation (finalAttrs: {
'';
installPhase = "touch $out";
};
meta = {
platforms = [ "x86_64-linux" ];
maintainers = with lib.maintainers.bsc; [ rarias ];
};
})

View File

@ -9,7 +9,6 @@
, automake
, libtool
, mpi
, rsync
, gfortran
}:
@ -44,13 +43,24 @@ stdenv.mkDerivation rec {
configureFlags = [
"--with-infiniband=${rdma-core-all}"
"--with-mpi=${mpiAll}"
"--with-mpi=yes" # fixes mpi detection when cross-compiling
"--with-slurm"
"CFLAGS=-fPIC"
"CXXFLAGS=-fPIC"
];
buildInputs = [ slurm mpiAll rdma-core-all autoconf automake libtool rsync gfortran ];
nativeBuildInputs = [
autoconf
automake
gfortran
libtool
];
buildInputs = [
slurm
mpiAll
rdma-core-all
];
hardeningDisable = [ "all" ];
@ -60,5 +70,6 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = false; # infiniband detection does not work
};
}

View File

@ -16,19 +16,19 @@
, useGit ? false
, gitUrl ? "ssh://git@bscpm04.bsc.es/llvm-ompss/llvm-mono.git"
, gitBranch ? "master"
, gitCommit ? "880e2341c56bad1dc14e8c369fb3356bec19018e"
, gitCommit ? "872ba63f86edaefc9787984ef3fae9f2f94e0124" # github-release-2025.11
}:
let
stdenv = llvmPackages_latest.stdenv;
release = rec {
version = "2025.06";
version = "2025.11";
src = fetchFromGitHub {
owner = "bsc-pm";
repo = "llvm";
rev = "refs/tags/github-release-${version}";
hash = "sha256-ww9PpRmtz/M9IyLiZ8rAehx2UW4VpQt+svf4XfKBzKo=";
hash = "sha256-UgwMTUkM9Z87dDH205swZFBeFhrcbLAxginViG40pBM=";
};
};

View File

@ -6,6 +6,13 @@
, pmix
, gfortran
, symlinkJoin
# Disabled when cross-compiling
# To fix cross compilation, we should fill the values in:
# https://github.com/pmodels/mpich/blob/main/maint/fcrosscompile/cross_values.txt.in
# For each arch
, enableFortran ? stdenv.hostPlatform == stdenv.buildPlatform
, perl
, targetPackages
}:
let
@ -15,10 +22,13 @@ let
paths = [ pmix.dev pmix.out ];
};
in mpich.overrideAttrs (old: {
buildInput = old.buildInputs ++ [
buildInputs = old.buildInputs ++ [
libfabric
pmixAll
];
nativeBuildInputs = old.nativeBuildInputs ++ [
perl
];
configureFlags = [
"--enable-shared"
"--enable-sharedlib"
@ -31,10 +41,21 @@ in mpich.overrideAttrs (old: {
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
"FCFLAGS=-fallow-argument-mismatch"
] ++ lib.optionals (!enableFortran) [
"--disable-fortran"
];
preFixup = ''
sed -i 's:^CC=.*:CC=${targetPackages.stdenv.cc}/bin/${targetPackages.stdenv.cc.targetPrefix}cc:' $out/bin/mpicc
sed -i 's:^CXX=.*:CXX=${targetPackages.stdenv.cc}/bin/${targetPackages.stdenv.cc.targetPrefix}c++:' $out/bin/mpicxx
'' + lib.optionalString enableFortran ''
sed -i 's:^FC=.*:FC=${targetPackages.gfortran or gfortran}/bin/${targetPackages.gfortran.targetPrefix or gfortran.targetPrefix}gfortran:' $out/bin/mpifort
'';
hardeningDisable = [ "all" ];
meta = old.meta // {
maintainers = old.meta.maintainers ++ (with lib.maintainers.bsc; [ rarias ]);
cross = true;
};
})

22
pkgs/nixgen/default.nix Normal file
View File

@ -0,0 +1,22 @@
{
stdenv
, lib
}:
stdenv.mkDerivation {
pname = "nixgen";
version = "0.0.1";
src = ./nixgen;
dontUnpack = true;
phases = [ "installPhase" ];
installPhase = ''
mkdir -p $out/bin
cp -a $src $out/bin/nixgen
'';
meta = {
description = "Quickly generate flake.nix from command line";
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
};
}

97
pkgs/nixgen/nixgen Executable file
View File

@ -0,0 +1,97 @@
#!/bin/sh
#
# Copyright (c) 2025, Barcelona Supercomputing Center (BSC)
# SPDX-License-Identifier: GPL-3.0+
# Author: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
function usage() {
echo "USAGE: nixgen [-f] [package [...]] [-b package [...]]" >&2
echo " Generates a flake.nix file with the given packages." >&2
echo " After flake.nix is created, use 'nix develop' to enter the shell." >&2
echo "OPTIONS" >&2
echo " -f Overwrite existing flake.nix (default: no)." >&2
echo " packages... Add these packages to the shell." >&2
echo " -b packages... Add the dependencies needed to build these packages." >&2
echo "EXAMPLE" >&2
echo " $ nixgen ovni bigotes -b nosv tampi" >&2
echo " Adds the packages ovni and bigotes as well as all required dependencies" >&2
echo " to build nosv and tampi." >&2
echo "AUTHOR" >&2
echo " Rodrigo Arias Mallo <rodrigo.arias@bsc.es>" >&2
exit 1
}
mode=package
packages=
inputsFrom=
force=
if [[ $# -eq 0 ]]; then
usage
fi
while [[ $# -gt 0 ]]; do
case $1 in -b)
mode=build
shift
;;
-f)
force=1
shift
;;
-h)
usage
;;
-*|--*)
echo "error: unknown option $1" >&2
exit 1
;;
*)
if [ "$mode" == "package" ]; then
packages+="${packages:+ }$1"
else
inputsFrom+="${inputsFrom:+ }$1"
fi
shift
;;
esac
done
if [ ! "$force" -a -e flake.nix ]; then
echo "error: flake.nix exists, force overwrite with -f" >&2
exit 1
fi
cat > flake.nix <<EOF
{
inputs.jungle.url = "git+https://jungle.bsc.es/git/rarias/jungle";
outputs = { self, jungle }:
let
nixpkgs = jungle.inputs.nixpkgs;
customOverlay = (final: prev: {
# Example overlay, for now empty
});
pkgs = import nixpkgs {
system = "x86_64-linux";
overlays = [
# Apply jungle overlay to get our BSC custom packages
jungle.outputs.bscOverlay
# And on top apply our local changes to customize for cluster
customOverlay
];
};
in {
devShells.x86_64-linux.default = pkgs.mkShell {
pname = "devshell";
# Include these packages in the shell
packages = with pkgs; [
$packages
];
# The dependencies needed to build these packages will be also included
inputsFrom = with pkgs; [
$inputsFrom
];
};
};
}
EOF

View File

@ -14,19 +14,19 @@
, useGit ? false
, gitUrl ? "ssh://git@gitlab-internal.bsc.es/nos-v/nodes.git"
, gitBranch ? "master"
, gitCommit ? "6002ec9ae6eb876d962cc34366952a3b26599ba6"
, gitCommit ? "511489e71504a44381e0930562e7ac80ac69a848" # version-1.4
}:
with lib;
let
release = rec {
version = "1.3";
version = "1.4";
src = fetchFromGitHub {
owner = "bsc-pm";
repo = "nodes";
rev = "version-${version}";
hash = "sha256-cFb9pxcjtkMmH0CsGgUO9LTdXDNh7MCqicgGWawLrsU=";
hash = "sha256-+lR/R0l3fGZO3XG7whMorFW2y2YZ0ZFnLeOHyQYrAsQ=";
};
};

View File

@ -13,19 +13,19 @@
, useGit ? false
, gitUrl ? "git@gitlab-internal.bsc.es:nos-v/nos-v.git"
, gitBranch ? "master"
, gitCommit ? "9f47063873c3aa9d6a47482a82c5000a8c813dd8"
, gitCommit ? "1108e4786b58e0feb9a16fa093010b763eb2f8e8" # version 4.0.0
}:
with lib;
let
release = rec {
version = "3.2.0";
version = "4.0.0";
src = fetchFromGitHub {
owner = "bsc-pm";
repo = "nos-v";
rev = "${version}";
hash = "sha256-yaz92426EM8trdkBJlISmAoG9KJCDTvoAW/HKrasvOw=";
hash = "sha256-llaq73bd/YxLVKNlMebnUHKa4z3sdcsuDUoVwUxNuw8=";
};
};

View File

@ -32,6 +32,11 @@ stdenv.mkDerivation rec {
"CXX=mpicxx"
];
env = {
MPICH_CC="${stdenv.cc}/bin/${stdenv.cc.targetPrefix}cc";
MPICH_CXX="${stdenv.cc}/bin/${stdenv.cc.targetPrefix}c++";
};
postInstall = ''
mkdir -p $out/bin
for f in $(find $out -executable -type f); do
@ -44,5 +49,6 @@ stdenv.mkDerivation rec {
homepage = "http://mvapich.cse.ohio-state.edu/benchmarks/";
maintainers = [ ];
platforms = lib.platforms.all;
cross = true;
};
}

View File

@ -7,7 +7,7 @@
, useGit ? false
, gitBranch ? "master"
, gitUrl ? "ssh://git@bscpm04.bsc.es/rarias/ovni.git"
, gitCommit ? "e4f62382076f0cf0b1d08175cf57cc0bc51abc61"
, gitCommit ? "06432668f346c8bdc1006fabc23e94ccb81b0d8b" # version 1.13.0
, enableDebug ? false
# Only enable MPI if the build is native (fails on cross-compilation)
, useMpi ? (stdenv.buildPlatform.canExecute stdenv.hostPlatform)
@ -15,13 +15,13 @@
let
release = rec {
version = "1.12.0";
version = "1.13.0";
src = fetchFromGitHub {
owner = "bsc-pm";
repo = "ovni";
rev = "${version}";
hash = "sha256-H04JvsVKrdqr3ON7JhU0g17jjlg/jzQ7eTfx9vUNd3E=";
} // { shortRev = "a73afcf"; };
hash = "sha256-0l2ryIyWNiZqeYdVlnj/WnQGS3xFCY4ICG8JedX424w=";
} // { shortRev = "0643266"; };
};
git = rec {

View File

@ -35,5 +35,6 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.mit;
cross = true;
};
}

View File

@ -5,23 +5,14 @@
, automake
, autoconf
, libtool
, mpi
, autoreconfHook
, gpi-2
, boost
, numactl
, rdma-core
, gfortran
, symlinkJoin
}:
let
mpiAll = symlinkJoin {
name = "mpi-all";
paths = [ mpi.all ];
};
in
stdenv.mkDerivation rec {
pname = "tagaspi";
enableParallelBuilding = true;
@ -35,16 +26,18 @@ stdenv.mkDerivation rec {
hash = "sha256-RGG/Re2uM293HduZfGzKUWioDtwnSYYdfeG9pVrX9EM=";
};
buildInputs = [
nativeBuildInputs = [
autoreconfHook
automake
autoconf
libtool
gfortran
];
buildInputs = [
boost
numactl
rdma-core
gfortran
mpiAll
];
dontDisableStatic = true;
@ -63,5 +56,6 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = false; # gpi-2 cannot cross
};
}

View File

@ -68,5 +68,6 @@ in stdenv.mkDerivation {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = true;
};
}