forked from rarias/jungle
Compare commits
5 Commits
upgrade/25
...
81fa5392bc
| Author | SHA1 | Date | |
|---|---|---|---|
|
81fa5392bc
|
|||
|
7e2ba7049d
|
|||
|
7c1a32dbec
|
|||
|
437f7947e1
|
|||
|
053ee5b0a5
|
8
flake.lock
generated
8
flake.lock
generated
@@ -2,16 +2,16 @@
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1764522689,
|
||||
"narHash": "sha256-SqUuBFjhl/kpDiVaKLQBoD8TLD+/cTUzzgVFoaHrkqY=",
|
||||
"lastModified": 1752436162,
|
||||
"narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "8bb5646e0bed5dbd3ab08c7a7cc15b75ab4e1d0f",
|
||||
"rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-25.11",
|
||||
"ref": "nixos-25.05",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, ... }:
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
{ pkgs, ... }:
|
||||
{ pkgs, config, ... }:
|
||||
|
||||
{
|
||||
environment.systemPackages = with pkgs; [
|
||||
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
||||
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
|
||||
ncdu perf ldns pv
|
||||
ncdu config.boot.kernelPackages.perf ldns pv
|
||||
# From jungle overlay
|
||||
osumb nixgen
|
||||
];
|
||||
|
||||
@@ -5,5 +5,5 @@
|
||||
boot.kernelModules = [ "ipmi_watchdog" ];
|
||||
|
||||
# Enable systemd watchdog with 30 s interval
|
||||
systemd.settings.Manager.RuntimeWatchdogSec = 30;
|
||||
systemd.watchdog.runtimeTime = "30s";
|
||||
}
|
||||
|
||||
@@ -93,4 +93,20 @@
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
|
||||
# Only allow SSH connections from users who have a SLURM allocation
|
||||
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
security.pam.services.sshd.rules.account.slurm = {
|
||||
control = "required";
|
||||
enable = true;
|
||||
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
|
||||
args = [ "log_level=debug5" ];
|
||||
order = 999999; # Make it last one
|
||||
};
|
||||
|
||||
# Disable systemd session (pam_systemd.so) as it will conflict with the
|
||||
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
|
||||
# into the slurmstepd task and then into the systemd session, which is not
|
||||
# what we want, otherwise it will linger even if all jobs are gone.
|
||||
security.pam.services.sshd.startSession = lib.mkForce false;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,3 @@
|
||||
{
|
||||
services.nixseparatedebuginfod2 = {
|
||||
enable = true;
|
||||
substituters = [
|
||||
"local:"
|
||||
"https://cache.nixos.org"
|
||||
"http://hut/cache"
|
||||
];
|
||||
};
|
||||
services.nixseparatedebuginfod.enable = true;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{ lib, pkgs, ... }:
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
@@ -21,20 +21,4 @@
|
||||
};
|
||||
|
||||
services.slurm.client.enable = true;
|
||||
|
||||
# Only allow SSH connections from users who have a SLURM allocation
|
||||
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
security.pam.services.sshd.rules.account.slurm = {
|
||||
control = "required";
|
||||
enable = true;
|
||||
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
|
||||
args = [ "log_level=debug5" ];
|
||||
order = 999999; # Make it last one
|
||||
};
|
||||
|
||||
# Disable systemd session (pam_systemd.so) as it will conflict with the
|
||||
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
|
||||
# into the slurmstepd task and then into the systemd session, which is not
|
||||
# what we want, otherwise it will linger even if all jobs are gone.
|
||||
security.pam.services.sshd.startSession = lib.mkForce false;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,31 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
let
|
||||
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Shutting down host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
||||
done
|
||||
'';
|
||||
|
||||
resumeProgram = pkgs.writeShellScript "resume.sh" ''
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Starting host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
||||
done
|
||||
'';
|
||||
|
||||
in {
|
||||
services.slurm = {
|
||||
controlMachine = "apex";
|
||||
clusterName = "jungle";
|
||||
@@ -34,6 +59,16 @@
|
||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||
TaskPlugin=task/affinity,task/cgroup
|
||||
|
||||
# Power off unused nodes until they are requested
|
||||
SuspendProgram=${suspendProgram}
|
||||
SuspendTimeout=60
|
||||
ResumeProgram=${resumeProgram}
|
||||
ResumeTimeout=300
|
||||
SuspendExcNodes=fox
|
||||
|
||||
# Turn the nodes off after 1 hour of inactivity
|
||||
SuspendTime=3600
|
||||
|
||||
# Reduce port range so we can allow only this range in the firewall
|
||||
SrunPortRange=60000-61000
|
||||
|
||||
|
||||
17
overlay.nix
17
overlay.nix
@@ -30,8 +30,7 @@ let
|
||||
amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { };
|
||||
});
|
||||
lmbench = callPackage ./pkgs/lmbench/default.nix { };
|
||||
# Broken and unmantained
|
||||
# mcxx = callPackage ./pkgs/mcxx/default.nix { };
|
||||
mcxx = callPackage ./pkgs/mcxx/default.nix { };
|
||||
meteocat-exporter = prev.callPackage ./pkgs/meteocat-exporter/default.nix { };
|
||||
mpi = final.mpich; # Set MPICH as default
|
||||
mpich = callPackage ./pkgs/mpich/default.nix { mpich = prev.mpich; };
|
||||
@@ -102,16 +101,14 @@ let
|
||||
pkgsTopLevel = filterAttrs (_: isDerivation) bscPkgs;
|
||||
|
||||
# Native build in that platform doesn't imply cross build works
|
||||
canCrossCompile = platform: default: pkg:
|
||||
canCrossCompile = platform: pkg:
|
||||
(isDerivation pkg) &&
|
||||
# If meta.cross is undefined, use default
|
||||
(pkg.meta.cross or default) &&
|
||||
(meta.availableOn final.pkgsCross.${platform}.stdenv.hostPlatform pkg);
|
||||
# Must be defined explicitly
|
||||
(pkg.meta.cross or false) &&
|
||||
(meta.availableOn platform pkg);
|
||||
|
||||
# For now only RISC-V
|
||||
crossSet = genAttrs [ "riscv64" ] (platform:
|
||||
filterAttrs (_: canCrossCompile platform true)
|
||||
final.pkgsCross.${platform}.bsc.pkgsTopLevel);
|
||||
crossSet = { riscv64 = final.pkgsCross.riscv64.bsc.pkgsTopLevel; };
|
||||
|
||||
buildList = name: paths:
|
||||
final.runCommandLocal name { } ''
|
||||
@@ -131,7 +128,7 @@ let
|
||||
# For now only RISC-V
|
||||
crossList = buildList "ci-cross"
|
||||
(filter
|
||||
(canCrossCompile "riscv64" false) # opt-in (pkgs with: meta.cross = true)
|
||||
(canCrossCompile final.pkgsCross.riscv64.stdenv.hostPlatform)
|
||||
(builtins.attrValues crossSet.riscv64));
|
||||
|
||||
in bscPkgs // {
|
||||
|
||||
@@ -90,7 +90,7 @@ in
|
||||
meta = {
|
||||
description = "Performance analysis tool-suite for x86 based applications";
|
||||
homepage = "https://www.amd.com/es/developer/uprof.html";
|
||||
platforms = [ "x86_64-linux" ];
|
||||
platforms = lib.platforms.linux;
|
||||
license = lib.licenses.unfree;
|
||||
maintainers = with lib.maintainers.bsc; [ rarias varcila ];
|
||||
};
|
||||
|
||||
@@ -19,7 +19,7 @@ in stdenv.mkDerivation {
|
||||
'';
|
||||
hardeningDisable = [ "pic" "format" ];
|
||||
nativeBuildInputs = kernel.moduleBuildDependencies;
|
||||
patches = [ ./makefile.patch ./hrtimer.patch ./remove-wr-rdmsrq.patch ];
|
||||
patches = [ ./makefile.patch ./hrtimer.patch ];
|
||||
makeFlags = [
|
||||
"KERNEL_VERSION=${kernel.modDirVersion}"
|
||||
"KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
diff --git a/inc/PwrProfAsm.h b/inc/PwrProfAsm.h
|
||||
index d77770a..c93a0e9 100644
|
||||
--- a/inc/PwrProfAsm.h
|
||||
+++ b/inc/PwrProfAsm.h
|
||||
@@ -347,6 +347,7 @@
|
||||
|
||||
#endif
|
||||
|
||||
+/*
|
||||
#define rdmsrq(msr,val1,val2,val3,val4) ({ \
|
||||
__asm__ __volatile__( \
|
||||
"rdmsr\n" \
|
||||
@@ -362,6 +363,7 @@
|
||||
:"c"(msr), "a"(val1), "d"(val2), "S"(val3), "D"(val4) \
|
||||
); \
|
||||
})
|
||||
+*/
|
||||
|
||||
#define rdmsrpw(msr,val1,val2,val3,val4) ({ \
|
||||
__asm__ __volatile__( \
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
stdenv
|
||||
, lib
|
||||
, cudatoolkit
|
||||
, cudaPackages
|
||||
, autoAddDriverRunpath
|
||||
@@ -12,7 +11,7 @@ stdenv.mkDerivation (finalAttrs: {
|
||||
src = ./.;
|
||||
buildInputs = [
|
||||
cudatoolkit # Required for nvcc
|
||||
(lib.getOutput "static" cudaPackages.cuda_cudart) # Required for -lcudart_static
|
||||
cudaPackages.cuda_cudart.static # Required for -lcudart_static
|
||||
autoAddDriverRunpath
|
||||
];
|
||||
installPhase = ''
|
||||
@@ -41,9 +40,4 @@ stdenv.mkDerivation (finalAttrs: {
|
||||
'';
|
||||
installPhase = "touch $out";
|
||||
};
|
||||
|
||||
meta = {
|
||||
platforms = [ "x86_64-linux" ];
|
||||
maintainers = with lib.maintainers.bsc; [ rarias ];
|
||||
};
|
||||
})
|
||||
|
||||
@@ -43,7 +43,7 @@ stdenv.mkDerivation rec {
|
||||
|
||||
configureFlags = [
|
||||
"--with-infiniband=${rdma-core-all}"
|
||||
"--with-mpi=yes" # fixes mpi detection when cross-compiling
|
||||
"--with-mpi=yes"
|
||||
"--with-slurm"
|
||||
"CFLAGS=-fPIC"
|
||||
"CXXFLAGS=-fPIC"
|
||||
@@ -70,6 +70,6 @@ stdenv.mkDerivation rec {
|
||||
maintainers = with lib.maintainers.bsc; [ rarias ];
|
||||
platforms = lib.platforms.linux;
|
||||
license = lib.licenses.gpl3Plus;
|
||||
cross = false; # infiniband detection does not work
|
||||
cross = false;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
, zlib
|
||||
, autoPatchelfHook
|
||||
, libfabric
|
||||
, gcc
|
||||
, gcc13
|
||||
, wrapCCWith
|
||||
}:
|
||||
|
||||
@@ -33,6 +33,8 @@ let
|
||||
maintainers = with lib.maintainers.bsc; [ abonerib ];
|
||||
};
|
||||
|
||||
gcc = gcc13;
|
||||
|
||||
v = {
|
||||
hpckit = "2023.1.0";
|
||||
compiler = "2023.1.0";
|
||||
|
||||
@@ -27,10 +27,10 @@ let
|
||||
# We need to replace the lld linker from bintools with our linker just built,
|
||||
# otherwise we run into incompatibility issues when mixing compiler and linker
|
||||
# versions.
|
||||
bintools-unwrapped = llvmPackages_latest.bintools-unwrapped.override {
|
||||
bintools-unwrapped = llvmPackages_latest.tools.bintools-unwrapped.override {
|
||||
lld = clangOmpss2Unwrapped;
|
||||
};
|
||||
bintools = llvmPackages_latest.bintools.override {
|
||||
bintools = llvmPackages_latest.tools.bintools.override {
|
||||
bintools = bintools-unwrapped;
|
||||
};
|
||||
targetConfig = stdenv.targetPlatform.config;
|
||||
|
||||
@@ -65,7 +65,6 @@ stdenv.mkDerivation rec {
|
||||
];
|
||||
|
||||
meta = {
|
||||
broken = true;
|
||||
homepage = "https://github.com/bsc-pm/mcxx";
|
||||
description = "C/C++/Fortran source-to-source compilation infrastructure aimed at fast prototyping";
|
||||
maintainers = with lib.maintainers.bsc; [ rpenacob ];
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
{ python3Packages, lib }:
|
||||
|
||||
python3Packages.buildPythonApplication {
|
||||
python3Packages.buildPythonApplication rec {
|
||||
pname = "meteocat-exporter";
|
||||
version = "1.0";
|
||||
|
||||
pyproject = true;
|
||||
|
||||
src = ./.;
|
||||
|
||||
doCheck = false;
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
# For each arch
|
||||
, enableFortran ? stdenv.hostPlatform == stdenv.buildPlatform
|
||||
, perl
|
||||
, targetPackages
|
||||
}:
|
||||
|
||||
let
|
||||
@@ -46,10 +45,10 @@ in mpich.overrideAttrs (old: {
|
||||
];
|
||||
|
||||
preFixup = ''
|
||||
sed -i 's:^CC=.*:CC=${targetPackages.stdenv.cc}/bin/${targetPackages.stdenv.cc.targetPrefix}cc:' $out/bin/mpicc
|
||||
sed -i 's:^CXX=.*:CXX=${targetPackages.stdenv.cc}/bin/${targetPackages.stdenv.cc.targetPrefix}c++:' $out/bin/mpicxx
|
||||
sed -i 's:^CC=.*:CC=''${CC:-gcc}:' $out/bin/mpicc
|
||||
sed -i 's:^CXX=.*:CXX=''${CXX:-g++}:' $out/bin/mpicxx
|
||||
'' + lib.optionalString enableFortran ''
|
||||
sed -i 's:^FC=.*:FC=${targetPackages.gfortran or gfortran}/bin/${targetPackages.gfortran.targetPrefix or gfortran.targetPrefix}gfortran:' $out/bin/mpifort
|
||||
sed -i 's:^FC=.*:FC=''${FC:-gfortran}: $out/bin/mpifort
|
||||
'';
|
||||
|
||||
hardeningDisable = [ "all" ];
|
||||
|
||||
@@ -32,11 +32,6 @@ stdenv.mkDerivation rec {
|
||||
"CXX=mpicxx"
|
||||
];
|
||||
|
||||
env = {
|
||||
MPICH_CC="${stdenv.cc}/bin/${stdenv.cc.targetPrefix}cc";
|
||||
MPICH_CXX="${stdenv.cc}/bin/${stdenv.cc.targetPrefix}c++";
|
||||
};
|
||||
|
||||
postInstall = ''
|
||||
mkdir -p $out/bin
|
||||
for f in $(find $out -executable -type f); do
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
, paraverKernel
|
||||
, openssl
|
||||
, glibcLocales
|
||||
, wrapGAppsHook3
|
||||
, wrapGAppsHook
|
||||
}:
|
||||
|
||||
let
|
||||
@@ -64,7 +64,7 @@ stdenv.mkDerivation rec {
|
||||
autoconf
|
||||
automake
|
||||
autoreconfHook
|
||||
wrapGAppsHook3
|
||||
wrapGAppsHook
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
{ python3Packages, lib }:
|
||||
|
||||
python3Packages.buildPythonApplication {
|
||||
python3Packages.buildPythonApplication rec {
|
||||
pname = "upc-qaire-exporter";
|
||||
version = "1.0";
|
||||
|
||||
pyproject = true;
|
||||
|
||||
src = ./.;
|
||||
|
||||
doCheck = false;
|
||||
|
||||
Reference in New Issue
Block a user