forked from rarias/jungle
Compare commits
5 Commits
master
...
00e7bafb6b
| Author | SHA1 | Date | |
|---|---|---|---|
|
00e7bafb6b
|
|||
|
d9f726b453
|
|||
|
b323615370
|
|||
|
4edbeddef7
|
|||
|
3028dff971
|
@@ -93,4 +93,20 @@
|
|||||||
wantedBy = [ "multi-user.target" ];
|
wantedBy = [ "multi-user.target" ];
|
||||||
serviceConfig.ExecStart = script;
|
serviceConfig.ExecStart = script;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Only allow SSH connections from users who have a SLURM allocation
|
||||||
|
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||||
|
security.pam.services.sshd.rules.account.slurm = {
|
||||||
|
control = "required";
|
||||||
|
enable = true;
|
||||||
|
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
|
||||||
|
args = [ "log_level=debug5" ];
|
||||||
|
order = 999999; # Make it last one
|
||||||
|
};
|
||||||
|
|
||||||
|
# Disable systemd session (pam_systemd.so) as it will conflict with the
|
||||||
|
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
|
||||||
|
# into the slurmstepd task and then into the systemd session, which is not
|
||||||
|
# what we want, otherwise it will linger even if all jobs are gone.
|
||||||
|
security.pam.services.sshd.startSession = lib.mkForce false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
{ lib, pkgs, ... }:
|
{ lib, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
@@ -21,20 +21,4 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
services.slurm.client.enable = true;
|
services.slurm.client.enable = true;
|
||||||
|
|
||||||
# Only allow SSH connections from users who have a SLURM allocation
|
|
||||||
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
|
|
||||||
security.pam.services.sshd.rules.account.slurm = {
|
|
||||||
control = "required";
|
|
||||||
enable = true;
|
|
||||||
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
|
|
||||||
args = [ "log_level=debug5" ];
|
|
||||||
order = 999999; # Make it last one
|
|
||||||
};
|
|
||||||
|
|
||||||
# Disable systemd session (pam_systemd.so) as it will conflict with the
|
|
||||||
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
|
|
||||||
# into the slurmstepd task and then into the systemd session, which is not
|
|
||||||
# what we want, otherwise it will linger even if all jobs are gone.
|
|
||||||
security.pam.services.sshd.startSession = lib.mkForce false;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,31 @@
|
|||||||
{ config, pkgs, ... }:
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
{
|
let
|
||||||
|
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
|
||||||
|
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||||
|
set -x
|
||||||
|
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||||
|
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||||
|
hosts=$(scontrol show hostnames $1)
|
||||||
|
for host in $hosts; do
|
||||||
|
echo Shutting down host: $host
|
||||||
|
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
resumeProgram = pkgs.writeShellScript "resume.sh" ''
|
||||||
|
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||||
|
set -x
|
||||||
|
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||||
|
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||||
|
hosts=$(scontrol show hostnames $1)
|
||||||
|
for host in $hosts; do
|
||||||
|
echo Starting host: $host
|
||||||
|
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
in {
|
||||||
services.slurm = {
|
services.slurm = {
|
||||||
controlMachine = "apex";
|
controlMachine = "apex";
|
||||||
clusterName = "jungle";
|
clusterName = "jungle";
|
||||||
@@ -34,6 +59,16 @@
|
|||||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||||
TaskPlugin=task/affinity,task/cgroup
|
TaskPlugin=task/affinity,task/cgroup
|
||||||
|
|
||||||
|
# Power off unused nodes until they are requested
|
||||||
|
SuspendProgram=${suspendProgram}
|
||||||
|
SuspendTimeout=60
|
||||||
|
ResumeProgram=${resumeProgram}
|
||||||
|
ResumeTimeout=300
|
||||||
|
SuspendExcNodes=fox
|
||||||
|
|
||||||
|
# Turn the nodes off after 1 hour of inactivity
|
||||||
|
SuspendTime=3600
|
||||||
|
|
||||||
# Reduce port range so we can allow only this range in the firewall
|
# Reduce port range so we can allow only this range in the firewall
|
||||||
SrunPortRange=60000-61000
|
SrunPortRange=60000-61000
|
||||||
|
|
||||||
|
|||||||
14
overlay.nix
14
overlay.nix
@@ -101,16 +101,14 @@ let
|
|||||||
pkgsTopLevel = filterAttrs (_: isDerivation) bscPkgs;
|
pkgsTopLevel = filterAttrs (_: isDerivation) bscPkgs;
|
||||||
|
|
||||||
# Native build in that platform doesn't imply cross build works
|
# Native build in that platform doesn't imply cross build works
|
||||||
canCrossCompile = platform: default: pkg:
|
canCrossCompile = platform: pkg:
|
||||||
(isDerivation pkg) &&
|
(isDerivation pkg) &&
|
||||||
# If meta.cross is undefined, use default
|
# Must be defined explicitly
|
||||||
(pkg.meta.cross or default) &&
|
(pkg.meta.cross or false) &&
|
||||||
(meta.availableOn final.pkgsCross.${platform}.stdenv.hostPlatform pkg);
|
(meta.availableOn platform pkg);
|
||||||
|
|
||||||
# For now only RISC-V
|
# For now only RISC-V
|
||||||
crossSet = genAttrs [ "riscv64" ] (platform:
|
crossSet = { riscv64 = final.pkgsCross.riscv64.bsc.pkgsTopLevel; };
|
||||||
filterAttrs (_: canCrossCompile platform true)
|
|
||||||
final.pkgsCross.${platform}.bsc.pkgsTopLevel);
|
|
||||||
|
|
||||||
buildList = name: paths:
|
buildList = name: paths:
|
||||||
final.runCommandLocal name { } ''
|
final.runCommandLocal name { } ''
|
||||||
@@ -130,7 +128,7 @@ let
|
|||||||
# For now only RISC-V
|
# For now only RISC-V
|
||||||
crossList = buildList "ci-cross"
|
crossList = buildList "ci-cross"
|
||||||
(filter
|
(filter
|
||||||
(canCrossCompile "riscv64" false) # opt-in (pkgs with: meta.cross = true)
|
(canCrossCompile final.pkgsCross.riscv64.stdenv.hostPlatform)
|
||||||
(builtins.attrValues crossSet.riscv64));
|
(builtins.attrValues crossSet.riscv64));
|
||||||
|
|
||||||
in bscPkgs // {
|
in bscPkgs // {
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ in
|
|||||||
meta = {
|
meta = {
|
||||||
description = "Performance analysis tool-suite for x86 based applications";
|
description = "Performance analysis tool-suite for x86 based applications";
|
||||||
homepage = "https://www.amd.com/es/developer/uprof.html";
|
homepage = "https://www.amd.com/es/developer/uprof.html";
|
||||||
platforms = [ "x86_64-linux" ];
|
platforms = lib.platforms.linux;
|
||||||
license = lib.licenses.unfree;
|
license = lib.licenses.unfree;
|
||||||
maintainers = with lib.maintainers.bsc; [ rarias varcila ];
|
maintainers = with lib.maintainers.bsc; [ rarias varcila ];
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
stdenv
|
stdenv
|
||||||
, lib
|
|
||||||
, cudatoolkit
|
, cudatoolkit
|
||||||
, cudaPackages
|
, cudaPackages
|
||||||
, autoAddDriverRunpath
|
, autoAddDriverRunpath
|
||||||
@@ -41,9 +40,4 @@ stdenv.mkDerivation (finalAttrs: {
|
|||||||
'';
|
'';
|
||||||
installPhase = "touch $out";
|
installPhase = "touch $out";
|
||||||
};
|
};
|
||||||
|
|
||||||
meta = {
|
|
||||||
platforms = [ "x86_64-linux" ];
|
|
||||||
maintainers = with lib.maintainers.bsc; [ rarias ];
|
|
||||||
};
|
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -9,7 +9,6 @@
|
|||||||
, automake
|
, automake
|
||||||
, libtool
|
, libtool
|
||||||
, mpi
|
, mpi
|
||||||
, rsync
|
|
||||||
, gfortran
|
, gfortran
|
||||||
}:
|
}:
|
||||||
|
|
||||||
@@ -44,13 +43,24 @@ stdenv.mkDerivation rec {
|
|||||||
|
|
||||||
configureFlags = [
|
configureFlags = [
|
||||||
"--with-infiniband=${rdma-core-all}"
|
"--with-infiniband=${rdma-core-all}"
|
||||||
"--with-mpi=${mpiAll}"
|
"--with-mpi=yes"
|
||||||
"--with-slurm"
|
"--with-slurm"
|
||||||
"CFLAGS=-fPIC"
|
"CFLAGS=-fPIC"
|
||||||
"CXXFLAGS=-fPIC"
|
"CXXFLAGS=-fPIC"
|
||||||
];
|
];
|
||||||
|
|
||||||
buildInputs = [ slurm mpiAll rdma-core-all autoconf automake libtool rsync gfortran ];
|
nativeBuildInputs = [
|
||||||
|
autoconf
|
||||||
|
automake
|
||||||
|
gfortran
|
||||||
|
libtool
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
slurm
|
||||||
|
mpiAll
|
||||||
|
rdma-core-all
|
||||||
|
];
|
||||||
|
|
||||||
hardeningDisable = [ "all" ];
|
hardeningDisable = [ "all" ];
|
||||||
|
|
||||||
@@ -60,5 +70,6 @@ stdenv.mkDerivation rec {
|
|||||||
maintainers = with lib.maintainers.bsc; [ rarias ];
|
maintainers = with lib.maintainers.bsc; [ rarias ];
|
||||||
platforms = lib.platforms.linux;
|
platforms = lib.platforms.linux;
|
||||||
license = lib.licenses.gpl3Plus;
|
license = lib.licenses.gpl3Plus;
|
||||||
|
cross = false;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,12 @@
|
|||||||
, pmix
|
, pmix
|
||||||
, gfortran
|
, gfortran
|
||||||
, symlinkJoin
|
, symlinkJoin
|
||||||
|
# Disabled when cross-compiling
|
||||||
|
# To fix cross compilation, we should fill the values in:
|
||||||
|
# https://github.com/pmodels/mpich/blob/main/maint/fcrosscompile/cross_values.txt.in
|
||||||
|
# For each arch
|
||||||
|
, enableFortran ? stdenv.hostPlatform == stdenv.buildPlatform
|
||||||
|
, perl
|
||||||
}:
|
}:
|
||||||
|
|
||||||
let
|
let
|
||||||
@@ -15,10 +21,13 @@ let
|
|||||||
paths = [ pmix.dev pmix.out ];
|
paths = [ pmix.dev pmix.out ];
|
||||||
};
|
};
|
||||||
in mpich.overrideAttrs (old: {
|
in mpich.overrideAttrs (old: {
|
||||||
buildInput = old.buildInputs ++ [
|
buildInputs = old.buildInputs ++ [
|
||||||
libfabric
|
libfabric
|
||||||
pmixAll
|
pmixAll
|
||||||
];
|
];
|
||||||
|
nativeBuildInputs = old.nativeBuildInputs ++ [
|
||||||
|
perl
|
||||||
|
];
|
||||||
configureFlags = [
|
configureFlags = [
|
||||||
"--enable-shared"
|
"--enable-shared"
|
||||||
"--enable-sharedlib"
|
"--enable-sharedlib"
|
||||||
@@ -31,10 +40,21 @@ in mpich.overrideAttrs (old: {
|
|||||||
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
|
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
|
||||||
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
|
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
|
||||||
"FCFLAGS=-fallow-argument-mismatch"
|
"FCFLAGS=-fallow-argument-mismatch"
|
||||||
|
] ++ lib.optionals (!enableFortran) [
|
||||||
|
"--disable-fortran"
|
||||||
];
|
];
|
||||||
|
|
||||||
|
preFixup = ''
|
||||||
|
sed -i "s:^CC=.*:CC=''${CC:-gcc}:" $out/bin/mpicc
|
||||||
|
sed -i "s:^CXX=.*:CXX=''${CXX:-g++}:" $out/bin/mpicxx
|
||||||
|
'' + lib.optionalString enableFortran ''
|
||||||
|
sed -i "s:^FC=.*:FC=''${FC:-gfortran}:" $out/bin/mpifort
|
||||||
|
'';
|
||||||
|
|
||||||
hardeningDisable = [ "all" ];
|
hardeningDisable = [ "all" ];
|
||||||
|
|
||||||
meta = old.meta // {
|
meta = old.meta // {
|
||||||
maintainers = old.meta.maintainers ++ (with lib.maintainers.bsc; [ rarias ]);
|
maintainers = old.meta.maintainers ++ (with lib.maintainers.bsc; [ rarias ]);
|
||||||
|
cross = true;
|
||||||
};
|
};
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -44,5 +44,6 @@ stdenv.mkDerivation rec {
|
|||||||
homepage = "http://mvapich.cse.ohio-state.edu/benchmarks/";
|
homepage = "http://mvapich.cse.ohio-state.edu/benchmarks/";
|
||||||
maintainers = [ ];
|
maintainers = [ ];
|
||||||
platforms = lib.platforms.all;
|
platforms = lib.platforms.all;
|
||||||
|
cross = true;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,5 +35,6 @@ stdenv.mkDerivation rec {
|
|||||||
maintainers = with lib.maintainers.bsc; [ rarias ];
|
maintainers = with lib.maintainers.bsc; [ rarias ];
|
||||||
platforms = lib.platforms.linux;
|
platforms = lib.platforms.linux;
|
||||||
license = lib.licenses.mit;
|
license = lib.licenses.mit;
|
||||||
|
cross = true;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,23 +5,14 @@
|
|||||||
, automake
|
, automake
|
||||||
, autoconf
|
, autoconf
|
||||||
, libtool
|
, libtool
|
||||||
, mpi
|
|
||||||
, autoreconfHook
|
, autoreconfHook
|
||||||
, gpi-2
|
, gpi-2
|
||||||
, boost
|
, boost
|
||||||
, numactl
|
, numactl
|
||||||
, rdma-core
|
, rdma-core
|
||||||
, gfortran
|
, gfortran
|
||||||
, symlinkJoin
|
|
||||||
}:
|
}:
|
||||||
|
|
||||||
let
|
|
||||||
mpiAll = symlinkJoin {
|
|
||||||
name = "mpi-all";
|
|
||||||
paths = [ mpi.all ];
|
|
||||||
};
|
|
||||||
in
|
|
||||||
|
|
||||||
stdenv.mkDerivation rec {
|
stdenv.mkDerivation rec {
|
||||||
pname = "tagaspi";
|
pname = "tagaspi";
|
||||||
enableParallelBuilding = true;
|
enableParallelBuilding = true;
|
||||||
@@ -35,16 +26,18 @@ stdenv.mkDerivation rec {
|
|||||||
hash = "sha256-RGG/Re2uM293HduZfGzKUWioDtwnSYYdfeG9pVrX9EM=";
|
hash = "sha256-RGG/Re2uM293HduZfGzKUWioDtwnSYYdfeG9pVrX9EM=";
|
||||||
};
|
};
|
||||||
|
|
||||||
buildInputs = [
|
nativeBuildInputs = [
|
||||||
autoreconfHook
|
autoreconfHook
|
||||||
automake
|
automake
|
||||||
autoconf
|
autoconf
|
||||||
libtool
|
libtool
|
||||||
|
gfortran
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
boost
|
boost
|
||||||
numactl
|
numactl
|
||||||
rdma-core
|
rdma-core
|
||||||
gfortran
|
|
||||||
mpiAll
|
|
||||||
];
|
];
|
||||||
|
|
||||||
dontDisableStatic = true;
|
dontDisableStatic = true;
|
||||||
@@ -63,5 +56,6 @@ stdenv.mkDerivation rec {
|
|||||||
maintainers = with lib.maintainers.bsc; [ rarias ];
|
maintainers = with lib.maintainers.bsc; [ rarias ];
|
||||||
platforms = lib.platforms.linux;
|
platforms = lib.platforms.linux;
|
||||||
license = lib.licenses.gpl3Plus;
|
license = lib.licenses.gpl3Plus;
|
||||||
|
cross = false; # gpi-2 cannot cross
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,5 +68,6 @@ in stdenv.mkDerivation {
|
|||||||
maintainers = with lib.maintainers.bsc; [ rarias ];
|
maintainers = with lib.maintainers.bsc; [ rarias ];
|
||||||
platforms = lib.platforms.linux;
|
platforms = lib.platforms.linux;
|
||||||
license = lib.licenses.gpl3Plus;
|
license = lib.licenses.gpl3Plus;
|
||||||
|
cross = true;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user