Compare commits

..

5 Commits

12 changed files with 104 additions and 49 deletions

View File

@@ -93,4 +93,20 @@
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

View File

@@ -1,4 +1,4 @@
{ lib, pkgs, ... }:
{ lib, ... }:
{
imports = [
@@ -21,20 +21,4 @@
};
services.slurm.client.enable = true;
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

View File

@@ -1,6 +1,31 @@
{ config, pkgs, ... }:
{
let
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeShellScript "resume.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
in {
services.slurm = {
controlMachine = "apex";
clusterName = "jungle";
@@ -34,6 +59,16 @@
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=fox
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000

View File

@@ -101,16 +101,14 @@ let
pkgsTopLevel = filterAttrs (_: isDerivation) bscPkgs;
# Native build in that platform doesn't imply cross build works
canCrossCompile = platform: default: pkg:
canCrossCompile = platform: pkg:
(isDerivation pkg) &&
# If meta.cross is undefined, use default
(pkg.meta.cross or default) &&
(meta.availableOn final.pkgsCross.${platform}.stdenv.hostPlatform pkg);
# Must be defined explicitly
(pkg.meta.cross or false) &&
(meta.availableOn platform pkg);
# For now only RISC-V
crossSet = genAttrs [ "riscv64" ] (platform:
filterAttrs (_: canCrossCompile platform true)
final.pkgsCross.${platform}.bsc.pkgsTopLevel);
crossSet = { riscv64 = final.pkgsCross.riscv64.bsc.pkgsTopLevel; };
buildList = name: paths:
final.runCommandLocal name { } ''
@@ -130,7 +128,7 @@ let
# For now only RISC-V
crossList = buildList "ci-cross"
(filter
(canCrossCompile "riscv64" false) # opt-in (pkgs with: meta.cross = true)
(canCrossCompile final.pkgsCross.riscv64.stdenv.hostPlatform)
(builtins.attrValues crossSet.riscv64));
in bscPkgs // {

View File

@@ -90,7 +90,7 @@ in
meta = {
description = "Performance analysis tool-suite for x86 based applications";
homepage = "https://www.amd.com/es/developer/uprof.html";
platforms = [ "x86_64-linux" ];
platforms = lib.platforms.linux;
license = lib.licenses.unfree;
maintainers = with lib.maintainers.bsc; [ rarias varcila ];
};

View File

@@ -1,6 +1,5 @@
{
stdenv
, lib
, cudatoolkit
, cudaPackages
, autoAddDriverRunpath
@@ -41,9 +40,4 @@ stdenv.mkDerivation (finalAttrs: {
'';
installPhase = "touch $out";
};
meta = {
platforms = [ "x86_64-linux" ];
maintainers = with lib.maintainers.bsc; [ rarias ];
};
})

View File

@@ -9,7 +9,6 @@
, automake
, libtool
, mpi
, rsync
, gfortran
}:
@@ -44,13 +43,24 @@ stdenv.mkDerivation rec {
configureFlags = [
"--with-infiniband=${rdma-core-all}"
"--with-mpi=${mpiAll}"
"--with-mpi=yes"
"--with-slurm"
"CFLAGS=-fPIC"
"CXXFLAGS=-fPIC"
];
buildInputs = [ slurm mpiAll rdma-core-all autoconf automake libtool rsync gfortran ];
nativeBuildInputs = [
autoconf
automake
gfortran
libtool
];
buildInputs = [
slurm
mpiAll
rdma-core-all
];
hardeningDisable = [ "all" ];
@@ -60,5 +70,6 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = false;
};
}

View File

@@ -6,6 +6,12 @@
, pmix
, gfortran
, symlinkJoin
# Disabled when cross-compiling
# To fix cross compilation, we should fill the values in:
# https://github.com/pmodels/mpich/blob/main/maint/fcrosscompile/cross_values.txt.in
# For each arch
, enableFortran ? stdenv.hostPlatform == stdenv.buildPlatform
, perl
}:
let
@@ -15,10 +21,13 @@ let
paths = [ pmix.dev pmix.out ];
};
in mpich.overrideAttrs (old: {
buildInput = old.buildInputs ++ [
buildInputs = old.buildInputs ++ [
libfabric
pmixAll
];
nativeBuildInputs = old.nativeBuildInputs ++ [
perl
];
configureFlags = [
"--enable-shared"
"--enable-sharedlib"
@@ -31,10 +40,21 @@ in mpich.overrideAttrs (old: {
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
"FCFLAGS=-fallow-argument-mismatch"
] ++ lib.optionals (!enableFortran) [
"--disable-fortran"
];
preFixup = ''
sed -i "s:^CC=.*:CC=''${CC:-gcc}:" $out/bin/mpicc
sed -i "s:^CXX=.*:CXX=''${CXX:-g++}:" $out/bin/mpicxx
'' + lib.optionalString enableFortran ''
sed -i "s:^FC=.*:FC=''${FC:-gfortran}:" $out/bin/mpifort
'';
hardeningDisable = [ "all" ];
meta = old.meta // {
maintainers = old.meta.maintainers ++ (with lib.maintainers.bsc; [ rarias ]);
cross = true;
};
})

View File

@@ -44,5 +44,6 @@ stdenv.mkDerivation rec {
homepage = "http://mvapich.cse.ohio-state.edu/benchmarks/";
maintainers = [ ];
platforms = lib.platforms.all;
cross = true;
};
}

View File

@@ -35,5 +35,6 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.mit;
cross = true;
};
}

View File

@@ -5,23 +5,14 @@
, automake
, autoconf
, libtool
, mpi
, autoreconfHook
, gpi-2
, boost
, numactl
, rdma-core
, gfortran
, symlinkJoin
}:
let
mpiAll = symlinkJoin {
name = "mpi-all";
paths = [ mpi.all ];
};
in
stdenv.mkDerivation rec {
pname = "tagaspi";
enableParallelBuilding = true;
@@ -35,16 +26,18 @@ stdenv.mkDerivation rec {
hash = "sha256-RGG/Re2uM293HduZfGzKUWioDtwnSYYdfeG9pVrX9EM=";
};
buildInputs = [
nativeBuildInputs = [
autoreconfHook
automake
autoconf
libtool
gfortran
];
buildInputs = [
boost
numactl
rdma-core
gfortran
mpiAll
];
dontDisableStatic = true;
@@ -63,5 +56,6 @@ stdenv.mkDerivation rec {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = false; # gpi-2 cannot cross
};
}

View File

@@ -68,5 +68,6 @@ in stdenv.mkDerivation {
maintainers = with lib.maintainers.bsc; [ rarias ];
platforms = lib.platforms.linux;
license = lib.licenses.gpl3Plus;
cross = true;
};
}