Monitor GPFS scratch partition

Monitor tent node
Monitor GPFS home and projects partitions
2025-07-11 10:20:08 +02:00 · 2025-07-11 10:20:08 +02:00 · 2025-07-11 10:20:08 +02:00
63 changed files with 534 additions and 1662 deletions
--- a/flake.lock
+++ b/flake.lock
@@ -10,11 +10,11 @@
        "systems": "systems"
      },
      "locked": {
-        "lastModified": 1750173260,
-        "narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=",
+        "lastModified": 1723293904,
+        "narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=",
        "owner": "ryantm",
        "repo": "agenix",
-        "rev": "531beac616433bac6f9e2a19feb8e99a22a66baf",
+        "rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41",
        "type": "github"
      },
      "original": {
@@ -30,11 +30,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1749650500,
-        "narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=",
+        "lastModified": 1732868163,
+        "narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=",
        "ref": "refs/heads/master",
-        "rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405",
-        "revCount": 961,
+        "rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f",
+        "revCount": 952,
        "type": "git",
        "url": "https://git.sr.ht/~rodarima/bscpkgs"
      },
@@ -51,11 +51,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1744478979,
-        "narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=",
+        "lastModified": 1700795494,
+        "narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
        "owner": "lnl7",
        "repo": "nix-darwin",
-        "rev": "43975d782b418ebf4969e9ccba82466728c2851b",
+        "rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
        "type": "github"
      },
      "original": {
@@ -73,11 +73,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1745494811,
-        "narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=",
+        "lastModified": 1703113217,
+        "narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
        "owner": "nix-community",
        "repo": "home-manager",
-        "rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be",
+        "rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
        "type": "github"
      },
      "original": {
@@ -88,16 +88,16 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1752436162,
-        "narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
+        "lastModified": 1736867362,
+        "narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
+        "rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
-        "ref": "nixos-25.05",
+        "ref": "nixos-24.11",
        "repo": "nixpkgs",
        "type": "github"
      }
--- a/flake.nix
+++ b/flake.nix
@@ -1,6 +1,6 @@
 {
  inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
    agenix.url = "github:ryantm/agenix";
    agenix.inputs.nixpkgs.follows = "nixpkgs";
    bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
@@ -27,8 +27,6 @@ in
      lake2   = mkConf "lake2";
      raccoon = mkConf "raccoon";
      fox     = mkConf "fox";
-      apex    = mkConf "apex";
-      weasel  = mkConf "weasel";
    };

    packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {
--- a/keys.nix
+++ b/keys.nix
@@ -2,27 +2,25 @@
 # here all the public keys
 rec {
  hosts = {
-    hut    = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
-    owl1   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
-    owl2   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
-    eudy   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
-    koro   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
-    bay    = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
-    lake2  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
-    fox    = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
-    tent   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
-    apex   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
-    weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
+    hut   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
+    owl1  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
+    owl2  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
+    eudy  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
+    koro  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
+    bay   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
+    lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
+    fox   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
+    tent  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
  };

  hostGroup = with hosts; rec {
-    compute    = [ owl1 owl2 fox ];
-    playground = [ eudy koro weasel ];
+    untrusted  = [ fox ];
+    compute    = [ owl1 owl2 ];
+    playground = [ eudy koro ];
    storage    = [ bay lake2 ];
    monitor    = [ hut ];
-    login      = [ apex ];

-    system     = storage ++ monitor ++ login;
+    system     = storage ++ monitor;
    safe       = system ++ compute;
    all        = safe ++ playground;
  };
@@ -30,7 +28,6 @@ rec {
  admins = {
    "rarias@hut"  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
    "rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
-    "rarias@fox"  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
    root          = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
  };
 }
--- a/m/apex/configuration.nix
+++ b/m/apex/configuration.nix
@@ -1,85 +0,0 @@
-{ lib, config, pkgs, ... }:
-
-{
-  imports = [
-    ../common/xeon.nix
-    ../common/ssf/hosts.nix
-    ../module/ceph.nix
-    ../module/slurm-server.nix
-    ./nfs.nix
-    ./wireguard.nix
-  ];
-
-  # Don't install grub MBR for now
-  boot.loader.grub.device = "nodev";
-
-  boot.initrd.kernelModules = [
-    "megaraid_sas" # For HW RAID
-  ];
-
-  environment.systemPackages = with pkgs; [
-    storcli # To manage HW RAID
-  ];
-
-  fileSystems."/home" = {
-    device = "/dev/disk/by-label/home";
-    fsType = "ext4";
-  };
-
-  # No swap, there is plenty of RAM
-  swapDevices = lib.mkForce [];
-
-  networking = {
-    hostName = "apex";
-    defaultGateway = "84.88.53.233";
-    nameservers = [ "8.8.8.8" ];
-
-    # Public facing interface
-    interfaces.eno1.ipv4.addresses = [ {
-      address = "84.88.53.236";
-      prefixLength = 29;
-    } ];
-
-    # Internal LAN to our Ethernet switch
-    interfaces.eno2.ipv4.addresses = [ {
-      address = "10.0.40.30";
-      prefixLength = 24;
-    } ];
-
-    # Infiniband over Omnipath switch (disconnected for now)
-    # interfaces.ibp5s0 = {};
-
-    nat = {
-      enable = true;
-      internalInterfaces = [ "eno2" ];
-      externalInterface = "eno1";
-    };
-  };
-
-  # Use SSH tunnel to reach internal hosts
-  programs.ssh.extraConfig = ''
-    Host bscpm04.bsc.es gitlab-internal.bsc.es knights3.bsc.es
-      ProxyCommand nc -X connect -x localhost:23080 %h %p
-    Host raccoon
-      HostName knights3.bsc.es
-      ProxyCommand nc -X connect -x localhost:23080 %h %p
-    Host tent
-      ProxyJump raccoon
-  '';
-
-  networking.firewall = {
-    extraCommands = ''
-      # Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
-      # logs. Insert as first position so we also protect SSH.
-      iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse
-      # Same with opsmonweb01.bsc.es which seems to be trying to access via SSH
-      iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
-    '';
-  };
-
-  # Use tent for cache
-  nix.settings = {
-    extra-substituters = [ "https://jungle.bsc.es/cache" ];
-    extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
-  };
-}
--- a/m/apex/nfs.nix
+++ b/m/apex/nfs.nix
@@ -1,48 +0,0 @@
-{ ... }:
-
-{
-  services.nfs.server = {
-    enable = true;
-    lockdPort = 4001;
-    mountdPort = 4002;
-    statdPort = 4000;
-    exports = ''
-      /home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash)
-      /home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash)
-    '';
-  };
-  networking.firewall = {
-    # Check with `rpcinfo -p`
-    extraCommands = ''
-      # Accept NFS traffic from compute nodes but not from the outside
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111   -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
-      # Same but UDP
-      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111   -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
-
-      # Accept NFS traffic from wg0
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111   -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
-      # Same but UDP
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111   -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
-    '';
-  };
-}
--- a/m/apex/wireguard.nix
+++ b/m/apex/wireguard.nix
@@ -1,35 +0,0 @@
-{ config, ... }:
-
-{
-  networking.firewall = {
-    allowedUDPPorts = [ 666 ];
-  };
-
-  age.secrets.wgApex.file = ../../secrets/wg-apex.age;
-
-  # Enable WireGuard
-  networking.wireguard.enable = true;
-  networking.wireguard.interfaces = {
-    # "wg0" is the network interface name. You can name the interface arbitrarily.
-    wg0 = {
-      ips = [ "10.106.0.30/24" ];
-      listenPort = 666;
-      privateKeyFile = config.age.secrets.wgApex.path;
-      # Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
-      peers = [
-        {
-          name = "Fox";
-          publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
-          allowedIPs = [ "10.106.0.0/24" ];
-          endpoint = "fox.ac.upc.edu:666";
-          # Send keepalives every 25 seconds. Important to keep NAT tables alive.
-          persistentKeepalive = 25;
-        }
-      ];
-    };
-  };
-
-  networking.hosts = {
-    "10.106.0.1" = [ "fox" ];
-  };
-}
--- a/m/common/base.nix
+++ b/m/common/base.nix
@@ -3,7 +3,6 @@
  # Includes the basic configuration for an Intel server.
  imports = [
    ./base/agenix.nix
-    ./base/always-power-on.nix
    ./base/august-shutdown.nix
    ./base/boot.nix
    ./base/env.nix
--- a/m/common/base/always-power-on.nix
+++ b/m/common/base/always-power-on.nix
@@ -1,8 +0,0 @@
-{
-  imports = [
-    ../../module/power-policy.nix
-  ];
-
-  # Turn on as soon as we have power
-  power.policy = "always-on";
-}
--- a/m/common/base/august-shutdown.nix
+++ b/m/common/base/august-shutdown.nix
@@ -1,12 +1,12 @@
 {
-  # Shutdown all machines on August 3rd at 22:00, so we can protect the
+  # Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
  # hardware from spurious electrical peaks on the yearly electrical cut for
  # manteinance that starts on August 4th.
  systemd.timers.august-shutdown = {
-    description = "Shutdown on August 3rd for maintenance";
+    description = "Shutdown on August 2nd for maintenance";
    wantedBy = [ "timers.target" ];
    timerConfig = {
-      OnCalendar = "*-08-03 22:00:00";
+      OnCalendar = "*-08-02 11:00:00";
      RandomizedDelaySec = "10min";
      Unit = "systemd-poweroff.service";
    };
--- a/m/common/base/env.nix
+++ b/m/common/base/env.nix
@@ -3,8 +3,8 @@
 {
  environment.systemPackages = with pkgs; [
    vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
-    nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
-    ncdu config.boot.kernelPackages.perf ldns pv
+    nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
+    ncdu config.boot.kernelPackages.perf ldns
    # From bsckgs overlay
    osumb
  ];
--- a/m/common/base/net.nix
+++ b/m/common/base/net.nix
@@ -1,4 +1,4 @@
-{ pkgs, lib, ... }:
+{ pkgs, ... }:

 {
  networking = {
@@ -10,9 +10,6 @@
      allowedTCPPorts = [ 22 ];
    };

-    # Make sure we use iptables
-    nftables.enable = lib.mkForce false;
-
    hosts = {
      "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
      "84.88.51.152" = [ "raccoon" ];
--- a/m/common/base/nix.nix
+++ b/m/common/base/nix.nix
@@ -6,8 +6,6 @@
    (import ../../../pkgs/overlay.nix)
  ];

-  nixpkgs.config.allowUnfree = true;
-
  nix = {
    nixPath = [
      "nixpkgs=${nixpkgs}"
--- a/m/common/base/users.nix
+++ b/m/common/base/users.nix
@@ -56,7 +56,7 @@
        home = "/home/Computational/rpenacob";
        description = "Raúl Peñacoba";
        group = "Computational";
-        hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ];
+        hosts = [ "owl1" "owl2" "hut" "tent" "fox" ];
        hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
        openssh.authorizedKeys.keys = [
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
@@ -69,10 +69,10 @@
        home = "/home/Computational/anavarro";
        description = "Antoni Navarro";
        group = "Computational";
-        hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ];
-        hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1";
+        hosts = [ "hut" "tent" "raccoon" "fox" ];
+        hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
        openssh.authorizedKeys.keys = [
-          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni"
+          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
        ];
      };

@@ -82,7 +82,7 @@
        home = "/home/Computational/abonerib";
        description = "Aleix Boné";
        group = "Computational";
-        hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ];
+        hosts = [ "owl1" "owl2" "hut" "tent" "raccoon" "fox" ];
        hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
        openssh.authorizedKeys.keys = [
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
@@ -95,7 +95,7 @@
        home = "/home/Computational/vlopez";
        description = "Victor López";
        group = "Computational";
-        hosts = [ "apex" "koro" ];
+        hosts = [ "koro" ];
        hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
        openssh.authorizedKeys.keys = [
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
@@ -108,7 +108,7 @@
        home = "/home/Computational/dbautist";
        description = "Dylan Bautista Cases";
        group = "Computational";
-        hosts = [ "apex" "hut" "tent" "raccoon" ];
+        hosts = [ "hut" "tent" "raccoon" ];
        hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
        openssh.authorizedKeys.keys = [
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
@@ -121,7 +121,7 @@
        home = "/home/Computational/dalvare1";
        description = "David Álvarez";
        group = "Computational";
-        hosts = [ "apex" "hut" "tent" "fox" ];
+        hosts = [ "hut" "tent" "fox" ];
        hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
        openssh.authorizedKeys.keys = [
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
@@ -134,7 +134,7 @@
        home = "/home/Computational/varcila";
        description = "Vincent Arcila";
        group = "Computational";
-        hosts = [ "apex" "hut" "tent" "fox" ];
+        hosts = [ "hut" "tent" "fox" ];
        hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
        openssh.authorizedKeys.keys = [
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
@@ -154,20 +154,6 @@
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a"
        ];
      };
-
-      csiringo = {
-        # Arbitrary UID but large so it doesn't collide with other users on ssfhead.
-        uid = 9653;
-        isNormalUser = true;
-        home = "/home/Computational/csiringo";
-        description = "Cesare Siringo";
-        group = "Computational";
-        hosts = [ "apex" "weasel" ];
-        hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
-        openssh.authorizedKeys.keys = [
-          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
-        ];
-      };
    };

    groups = {
--- a/m/common/ssf.nix
+++ b/m/common/ssf.nix
@@ -3,7 +3,6 @@
  imports = [
    ./xeon.nix
    ./ssf/fs.nix
-    ./ssf/hosts.nix
    ./ssf/net.nix
    ./ssf/ssh.nix
  ];
--- a/m/common/ssf/hosts.nix
+++ b/m/common/ssf/hosts.nix
@@ -1,23 +0,0 @@
-{ pkgs, ... }:
-
-{
-  networking.hosts = {
-    # Login
-    "10.0.40.30" = [ "apex" ];
-
-    # Storage
-    "10.0.40.40" = [ "bay" ];   "10.0.42.40" = [ "bay-ib" ];    "10.0.40.141" = [ "bay-ipmi" ];
-    "10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ];
-    "10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ];  "10.0.40.143" = [ "lake2-ipmi" ];
-
-    # Xeon compute
-    "10.0.40.1" = [ "owl1" ];   "10.0.42.1" = [ "owl1-ib" ];   "10.0.40.101" = [ "owl1-ipmi" ];
-    "10.0.40.2" = [ "owl2" ];   "10.0.42.2" = [ "owl2-ib" ];   "10.0.40.102" = [ "owl2-ipmi" ];
-    "10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ];
-    #"10.0.40.4" = [ "tent" ];   "10.0.42.4" = [ "tent-ib" ];   "10.0.40.104" = [ "tent-ipmi" ];
-    "10.0.40.5" = [ "koro" ];   "10.0.42.5" = [ "koro-ib" ];   "10.0.40.105" = [ "koro-ipmi" ];
-    "10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ];
-    "10.0.40.7" = [ "hut" ];    "10.0.42.7" = [ "hut-ib" ];    "10.0.40.107" = [ "hut-ipmi" ];
-    "10.0.40.8" = [ "eudy" ];   "10.0.42.8" = [ "eudy-ib" ];   "10.0.40.108" = [ "eudy-ipmi" ];
-  };
-}
--- a/m/common/ssf/net.nix
+++ b/m/common/ssf/net.nix
@@ -9,6 +9,14 @@
    defaultGateway = "10.0.40.30";
    nameservers = ["8.8.8.8"];

+    proxy = {
+      default = "http://hut:23080/";
+      noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40,hut";
+      # Don't set all_proxy as go complains and breaks the gitlab runner, see:
+      # https://github.com/golang/go/issues/16715
+      allProxy = null;
+    };
+
    firewall = {
      extraCommands = ''
        # Prevent ssfhead from contacting our slurmd daemon
@@ -19,5 +27,64 @@
        iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
      '';
    };
+
+    extraHosts = ''
+      10.0.40.30              ssfhead
+      
+      # Node Entry for node: mds01 (ID=72)
+      10.0.40.40              bay mds01 mds01-eth0
+      10.0.42.40              bay-ib mds01-ib0
+      10.0.40.141             bay-ipmi mds01-ipmi0 mds01-ipmi
+      
+      # Node Entry for node: oss01 (ID=73)
+      10.0.40.41              oss01 oss01-eth0
+      10.0.42.41              oss01-ib0
+      10.0.40.142             oss01-ipmi0 oss01-ipmi
+      
+      # Node Entry for node: oss02 (ID=74)
+      10.0.40.42              lake2 oss02 oss02-eth0
+      10.0.42.42              lake2-ib oss02-ib0
+      10.0.40.143             lake2-ipmi oss02-ipmi0 oss02-ipmi
+      
+      # Node Entry for node: xeon01 (ID=15)
+      10.0.40.1               owl1 xeon01 xeon01-eth0
+      10.0.42.1               owl1-ib xeon01-ib0
+      10.0.40.101             owl1-ipmi xeon01-ipmi0 xeon01-ipmi
+      
+      # Node Entry for node: xeon02 (ID=16)
+      10.0.40.2               owl2 xeon02 xeon02-eth0
+      10.0.42.2               owl2-ib xeon02-ib0
+      10.0.40.102             owl2-ipmi xeon02-ipmi0 xeon02-ipmi
+      
+      # Node Entry for node: xeon03 (ID=17)
+      10.0.40.3               xeon03 xeon03-eth0
+      10.0.42.3               xeon03-ib0
+      10.0.40.103             xeon03-ipmi0 xeon03-ipmi
+      
+      # Node Entry for node: xeon04 (ID=18)
+      10.0.40.4               xeon04 xeon04-eth0
+      10.0.42.4               xeon04-ib0
+      10.0.40.104             xeon04-ipmi0 xeon04-ipmi
+      
+      # Node Entry for node: xeon05 (ID=19)
+      10.0.40.5               koro xeon05 xeon05-eth0
+      10.0.42.5               koro-ib xeon05-ib0
+      10.0.40.105             koro-ipmi xeon05-ipmi0
+      
+      # Node Entry for node: xeon06 (ID=20)
+      10.0.40.6               xeon06 xeon06-eth0
+      10.0.42.6               xeon06-ib0
+      10.0.40.106             xeon06-ipmi0 xeon06-ipmi
+      
+      # Node Entry for node: xeon07 (ID=21)
+      10.0.40.7               hut xeon07 xeon07-eth0
+      10.0.42.7               hut-ib xeon07-ib0
+      10.0.40.107             hut-ipmi xeon07-ipmi0 xeon07-ipmi
+      
+      # Node Entry for node: xeon08 (ID=22)
+      10.0.40.8               eudy xeon08 xeon08-eth0
+      10.0.42.8               eudy-ib xeon08-ib0
+      10.0.40.108             eudy-ipmi xeon08-ipmi0 xeon08-ipmi
+    '';
  };
 }
--- a/m/common/ssf/ssh.nix
+++ b/m/common/ssf/ssh.nix
@@ -1,16 +1,8 @@
 {
-  # Use SSH tunnel to apex to reach internal hosts
+  # Connect to intranet git hosts via proxy
  programs.ssh.extraConfig = ''
-    Host tent
-      ProxyJump raccoon
-
-    # Access raccoon via the HTTP proxy
-    Host raccoon knights3.bsc.es
-      HostName knights3.bsc.es
-      ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p'
-
-    # Make sure we can reach gitlab even if we don't have SSH access to raccoon
-    Host bscpm04.bsc.es gitlab-internal.bsc.es
-      ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p'
+    # Connect to BSC machines via hut proxy too
+    Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
+      ProxyCommand nc -X connect -x hut:23080 %h %p
  '';
 }
--- a/m/fox/configuration.nix
+++ b/m/fox/configuration.nix
@@ -4,17 +4,9 @@
  imports = [
    ../common/base.nix
    ../common/xeon/console.nix
-    ../module/amd-uprof.nix
    ../module/emulation.nix
-    ../module/nvidia.nix
-    ../module/slurm-client.nix
-    ./wireguard.nix
  ];

-  # Don't turn off on August as UPC has different dates.
-  # Fox works fine on power cuts.
-  systemd.timers.august-shutdown.enable = false;
-
  # Select the this using the ID to avoid mismatches
  boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";

@@ -30,31 +22,14 @@
  # Use performance for benchmarks
  powerManagement.cpuFreqGovernor = "performance";

-  services.amd-uprof.enable = true;
-
  # Disable NUMA balancing
  boot.kernel.sysctl."kernel.numa_balancing" = 0;

  # Expose kernel addresses
  boot.kernel.sysctl."kernel.kptr_restrict" = 0;

-  # Disable NMI watchdog to save one hw counter (for AMD uProf)
-  boot.kernel.sysctl."kernel.nmi_watchdog" = 0;
-
  services.openssh.settings.X11Forwarding = true;

-  services.fail2ban.enable = true;
-
-  # Use SSH tunnel to reach internal hosts
-  programs.ssh.extraConfig = ''
-    Host bscpm04.bsc.es gitlab-internal.bsc.es tent
-      ProxyJump raccoon
-    Host raccoon
-      ProxyJump apex
-      HostName 127.0.0.1
-      Port 22022
-  '';
-
  networking = {
    timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
    hostName = "fox";
@@ -78,20 +53,17 @@
    extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
  };

-  # Recommended for new graphics cards
-  hardware.nvidia.open = true;
+  # Configure Nvidia driver to use with CUDA
+  hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
+  hardware.graphics.enable = true;
+  nixpkgs.config.allowUnfree = true;
+  nixpkgs.config.nvidia.acceptLicense = true;
+  services.xserver.videoDrivers = [ "nvidia" ];

  # Mount NVME disks
  fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
  fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };

-  # Mount the NFS home
-  fileSystems."/nfs/home" = {
-    device = "10.106.0.30:/home";
-    fsType = "nfs";
-    options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
-  };
-
  # Make a /nvme{0,1}/$USER directory for each user.
  systemd.services.create-nvme-dirs = let
    # Take only normal users in fox
@@ -108,20 +80,4 @@
    wantedBy = [ "multi-user.target" ];
    serviceConfig.ExecStart = script;
  };
-
-  # Only allow SSH connections from users who have a SLURM allocation
-  # See: https://slurm.schedmd.com/pam_slurm_adopt.html
-  security.pam.services.sshd.rules.account.slurm = {
-    control = "required";
-    enable = true;
-    modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
-    args = [ "log_level=debug5" ];
-    order = 999999; # Make it last one
-  };
-
-  # Disable systemd session (pam_systemd.so) as it will conflict with the
-  # pam_slurm_adopt.so module. What happens is that the shell is first adopted
-  # into the slurmstepd task and then into the systemd session, which is not
-  # what we want, otherwise it will linger even if all jobs are gone.
-  security.pam.services.sshd.startSession = lib.mkForce false;
 }
--- a/m/fox/wireguard.nix
+++ b/m/fox/wireguard.nix
@@ -1,46 +0,0 @@
-{ config, ... }:
-
-{
-  networking.firewall = {
-    allowedUDPPorts = [ 666 ];
-  };
-
-  age.secrets.wgFox.file = ../../secrets/wg-fox.age;
-
-  networking.wireguard.enable = true;
-  networking.wireguard.interfaces = {
-    # "wg0" is the network interface name. You can name the interface arbitrarily.
-    wg0 = {
-      # Determines the IP address and subnet of the server's end of the tunnel interface.
-      ips = [ "10.106.0.1/24" ];
-
-      # The port that WireGuard listens to. Must be accessible by the client.
-      listenPort = 666;
-
-      # Path to the private key file.
-      privateKeyFile = config.age.secrets.wgFox.path;
-      # Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
-
-      peers = [
-        # List of allowed peers.
-        { 
-          name = "Apex";
-          publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
-          # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
-          allowedIPs = [ "10.106.0.30/32" ];
-        }
-      ];
-    };
-  };
-
-  networking.hosts = {
-    "10.106.0.30" = [ "apex" ];
-  };
-
-  networking.firewall = {
-    extraCommands = ''
-      # Accept slurm connections to slurmd from apex (via wireguard)
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
-    '';
-  };
-}
--- a/m/hut/blackbox.yml
+++ b/m/hut/blackbox.yml
@@ -3,12 +3,160 @@ modules:
    prober: http
    timeout: 5s
    http:
+      proxy_url: "http://127.0.0.1:23080"
+      skip_resolve_phase_with_proxy: true
      follow_redirects: true
-      preferred_ip_protocol: "ip4"
      valid_status_codes: []  # Defaults to 2xx
      method: GET
+  http_with_proxy:
+    prober: http
+    http:
+      proxy_url: "http://127.0.0.1:3128"
+      skip_resolve_phase_with_proxy: true
+  http_with_proxy_and_headers:
+    prober: http
+    http:
+      proxy_url: "http://127.0.0.1:3128"
+      proxy_connect_header:
+        Proxy-Authorization:
+          - Bearer token
+  http_post_2xx:
+    prober: http
+    timeout: 5s
+    http:
+      method: POST
+      headers:
+        Content-Type: application/json
+      body: '{}'
+  http_post_body_file:
+    prober: http
+    timeout: 5s
+    http:
+      method: POST
+      body_file: "/files/body.txt"
+  http_basic_auth_example:
+    prober: http
+    timeout: 5s
+    http:
+      method: POST
+      headers:
+        Host: "login.example.com"
+      basic_auth:
+        username: "username"
+        password: "mysecret"
+  http_2xx_oauth_client_credentials:
+    prober: http
+    timeout: 5s
+    http:
+      valid_http_versions: ["HTTP/1.1", "HTTP/2"]
+      follow_redirects: true
+      preferred_ip_protocol: "ip4"
+      valid_status_codes:
+        - 200
+        - 201
+      oauth2:
+        client_id: "client_id"
+        client_secret: "client_secret"
+        token_url: "https://api.example.com/token"
+        endpoint_params:
+          grant_type: "client_credentials"
+  http_custom_ca_example:
+    prober: http
+    http:
+      method: GET
+      tls_config:
+        ca_file: "/certs/my_cert.crt"
+  http_gzip:
+    prober: http
+    http:
+      method: GET
+      compression: gzip
+  http_gzip_with_accept_encoding:
+    prober: http
+    http:
+      method: GET
+      compression: gzip
+      headers:
+        Accept-Encoding: gzip
+  tls_connect:
+    prober: tcp
+    timeout: 5s
+    tcp:
+      tls: true
+  tcp_connect_example:
+    prober: tcp
+    timeout: 5s
+  imap_starttls:
+    prober: tcp
+    timeout: 5s
+    tcp:
+      query_response:
+        - expect: "OK.*STARTTLS"
+        - send: ". STARTTLS"
+        - expect: "OK"
+        - starttls: true
+        - send: ". capability"
+        - expect: "CAPABILITY IMAP4rev1"
+  smtp_starttls:
+    prober: tcp
+    timeout: 5s
+    tcp:
+      query_response:
+        - expect: "^220 ([^ ]+) ESMTP (.+)$"
+        - send: "EHLO prober\r"
+        - expect: "^250-STARTTLS"
+        - send: "STARTTLS\r"
+        - expect: "^220"
+        - starttls: true
+        - send: "EHLO prober\r"
+        - expect: "^250-AUTH"
+        - send: "QUIT\r"
+  irc_banner_example:
+    prober: tcp
+    timeout: 5s
+    tcp:
+      query_response:
+        - send: "NICK prober"
+        - send: "USER prober prober prober :prober"
+        - expect: "PING :([^ ]+)"
+          send: "PONG ${1}"
+        - expect: "^:[^ ]+ 001"
  icmp:
    prober: icmp
    timeout: 5s
    icmp:
      preferred_ip_protocol: "ip4"
+  dns_udp_example:
+    prober: dns
+    timeout: 5s
+    dns:
+      query_name: "www.prometheus.io"
+      query_type: "A"
+      valid_rcodes:
+        - NOERROR
+      validate_answer_rrs:
+        fail_if_matches_regexp:
+          - ".*127.0.0.1"
+        fail_if_all_match_regexp:
+          - ".*127.0.0.1"
+        fail_if_not_matches_regexp:
+          - "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
+        fail_if_none_matches_regexp:
+          - "127.0.0.1"
+      validate_authority_rrs:
+        fail_if_matches_regexp:
+          - ".*127.0.0.1"
+      validate_additional_rrs:
+        fail_if_matches_regexp:
+          - ".*127.0.0.1"
+  dns_soa:
+    prober: dns
+    dns:
+      query_name: "prometheus.io"
+      query_type: "SOA"
+  dns_tcp_example:
+    prober: dns
+    dns:
+      transport_protocol: "tcp" # defaults to "udp"
+      preferred_ip_protocol: "ip4" # defaults to "ip6"
+      query_name: "www.prometheus.io"
--- a/m/hut/configuration.nix
+++ b/m/hut/configuration.nix
@@ -7,9 +7,11 @@
    ../module/ceph.nix
    ../module/debuginfod.nix
    ../module/emulation.nix
+    ../module/slurm-client.nix
    ./gitlab-runner.nix
    ./monitoring.nix
    ./nfs.nix
+    ./slurm-server.nix
    ./nix-serve.nix
    ./public-inbox.nix
    ./gitea.nix
--- a/m/hut/gpfs-probe.sh
+++ b/m/hut/gpfs-probe.sh
@@ -2,10 +2,20 @@

 N=500

-t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
+t_proj=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
+t_scratch=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/scratch/bsc15/rodrigo/probe/gpfs.{1..$N} 2>&1; rm -f /gpfs/scratch/bsc15/rodrigo/probe/gpfs.{1..$N}")
+t_home=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /home/bsc/bsc015557/.gpfs/{1..$N} 2>&1; rm -f /home/bsc/bsc015557/.gpfs/{1..$N}")

-if [ -z "$t" ]; then
-  t="5.00"
+if [ -z "$t_proj" ]; then
+  t_proj="5.00"
+fi
+
+if [ -z "$t_scratch" ]; then
+  t_scratch="5.00"
+fi
+
+if [ -z "$t_home" ]; then
+  t_home="5.00"
 fi

 cat <<EOF
@@ -14,5 +24,7 @@ Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values

 # HELP gpfs_touch_latency Time to create $N files.
 # TYPE gpfs_touch_latency gauge
-gpfs_touch_latency $t
+gpfs_touch_latency{partition="projects"} $t_proj
+gpfs_touch_latency{partition="home"} $t_home
+gpfs_touch_latency{partition="scratch"} $t_scratch
 EOF
--- a/m/hut/monitoring.nix
+++ b/m/hut/monitoring.nix
@@ -267,6 +267,14 @@
          }
        ];
      }
+      {
+        job_name = "tent";
+        static_configs = [
+          {
+            targets = [ "127.0.0.1:29002" ]; # Node exporter
+          }
+        ];
+      }
    ];
  };
 }
--- a/m/hut/slurm-server.nix
+++ b/m/hut/slurm-server.nix
@@ -0,0 +1,7 @@
+{ ... }:
+
+{
+  services.slurm = {
+    server.enable = true;
+  };
+}
--- a/m/hut/targets.yml
+++ b/m/hut/targets.yml
@@ -4,7 +4,7 @@
  - xeon03-ipmi
  - xeon04-ipmi
  - koro-ipmi
-  - weasel-ipmi
+  - xeon06-ipmi
  - hut-ipmi
  - eudy-ipmi
  # Storage
--- a/m/map.nix
+++ b/m/map.nix
@@ -6,7 +6,7 @@
    switch-opa = { pos=41; size=1; };

    # SSF login
-    apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; };
+    ssfhead = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="operations@bsc.es"; };

    # Storage
    bay   = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
@@ -19,7 +19,7 @@
    xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
    # Slot 34 empty
    koro   = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
-    weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
+    xeon06 = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
    hut    = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
    eudy   = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };

--- a/m/module/amd-uprof.nix
+++ b/m/module/amd-uprof.nix
@@ -1,49 +0,0 @@
-{ config, lib, pkgs, ... }:
-
-{
-  options = {
-    services.amd-uprof = {
-      enable = lib.mkOption {
-        type = lib.types.bool;
-        default = false;
-        description = "Whether to enable AMD uProf.";
-      };
-    };
-  };
-
-  # Only setup amd-uprof if enabled
-  config = lib.mkIf config.services.amd-uprof.enable {
-
-    # First make sure that we add the module to the list of available modules
-    # in the kernel matching the same kernel version of this configuration.
-    boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ];
-    boot.kernelModules = [ "AMDPowerProfiler" ];
-
-    # Make the userspace tools available in $PATH.
-    environment.systemPackages = with pkgs; [ amd-uprof ];
-
-    # The AMDPowerProfiler module doesn't create the /dev device nor it emits
-    # any uevents, so we cannot use udev rules to automatically create the
-    # device. Instead, we run a systemd unit that does it after loading the
-    # modules.
-    systemd.services.amd-uprof-device = {
-      description = "Create /dev/AMDPowerProfiler device";
-      after = [ "systemd-modules-load.service" ];
-      wantedBy = [ "multi-user.target" ];
-      unitConfig.ConditionPathExists = [
-          "/proc/AMDPowerProfiler/device"
-          "!/dev/AMDPowerProfiler"
-      ];
-      serviceConfig = {
-        Type = "oneshot";
-        RemainAfterExit = true;
-        ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" ''
-          mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0
-        '';
-        ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" ''
-          rm -f /dev/AMDPowerProfiler
-        '';
-      };
-    };
-  };
-}
--- a/m/module/nvidia.nix
+++ b/m/module/nvidia.nix
@@ -1,20 +0,0 @@
-{ lib, config, pkgs, ... }:
-{
-  # Configure Nvidia driver to use with CUDA
-  hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
-  hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set");
-  hardware.graphics.enable = true;
-  nixpkgs.config.nvidia.acceptLicense = true;
-  services.xserver.videoDrivers = [ "nvidia" ];
-
-  # enable support for derivations which require nvidia-gpu to be available
-  # > requiredSystemFeatures = [ "cuda" ];
-  programs.nix-required-mounts.enable = true;
-  programs.nix-required-mounts.presets.nvidia-gpu.enable = true;
-  # They forgot to add the symlink
-  programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [
-    config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument
-  ];
-
-  environment.systemPackages = [ pkgs.cudainfo ];
-}
--- a/m/module/power-policy.nix
+++ b/m/module/power-policy.nix
@@ -1,33 +0,0 @@
-{ config, lib, pkgs, ... }:
-
-with lib;
-
-let
-  cfg = config.power.policy;
-in
-{
-  options = {
-    power.policy = mkOption {
-      type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]);
-      default = null;
-      description = "Set power policy to use via IPMI.";
-    };
-  };
-
-  config = mkIf (cfg != null) {
-    systemd.services."power-policy" = {
-      description = "Set power policy to use via IPMI";
-      wantedBy = [ "multi-user.target" ];
-      unitConfig = {
-        StartLimitBurst = "10";
-        StartLimitIntervalSec = "10m";
-      };
-      serviceConfig = {
-        ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}";
-        Type = "oneshot";
-        Restart = "on-failure";
-        RestartSec = "5s";
-      };
-    };
-  };
-}
--- a/m/module/slurm-client.nix
+++ b/m/module/slurm-client.nix
@@ -1,10 +1,33 @@
-{ lib, ... }:
+{ config, pkgs, lib, ... }:

-{
-  imports = [
-    ./slurm-common.nix
-  ];
+let
+  suspendProgram = pkgs.writeScript "suspend.sh" ''
+    #!/usr/bin/env bash
+    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
+    set -x
+    export "PATH=/run/current-system/sw/bin:$PATH"
+    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
+    hosts=$(scontrol show hostnames $1)
+    for host in $hosts; do
+      echo Shutting down host: $host
+      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
+    done
+  '';

+  resumeProgram = pkgs.writeScript "resume.sh" ''
+    #!/usr/bin/env bash
+    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
+    set -x
+    export "PATH=/run/current-system/sw/bin:$PATH"
+    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
+    hosts=$(scontrol show hostnames $1)
+    for host in $hosts; do
+      echo Starting host: $host
+      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
+    done
+  '';
+
+in {
  systemd.services.slurmd.serviceConfig = {
    # Kill all processes in the control group on stop/restart. This will kill
    # all the jobs running, so ensure that we only upgrade when the nodes are
@@ -14,5 +37,90 @@
    KillMode = lib.mkForce "control-group";
  };

-  services.slurm.client.enable = true;
+  services.slurm = {
+    client.enable = true;
+    controlMachine = "hut";
+    clusterName = "jungle";
+    nodeName = [
+      "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
+      "hut       Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
+    ];
+
+    partitionName = [
+      "owl Nodes=owl[1-2]     Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
+    ];
+
+    # See slurm.conf(5) for more details about these options.
+    extraConfig = ''
+      # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
+      # not with Intel MPI. For that use the compatibility shim libpmi.so
+      # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
+      # library in SLURM (--mpi=pmix). See more details here:
+      # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
+      MpiDefault=pmix
+
+      # When a node reboots return that node to the slurm queue as soon as it
+      # becomes operative again.
+      ReturnToService=2
+
+      # Track all processes by using a cgroup
+      ProctrackType=proctrack/cgroup
+
+      # Enable task/affinity to allow the jobs to run in a specified subset of
+      # the resources. Use the task/cgroup plugin to enable process containment.
+      TaskPlugin=task/affinity,task/cgroup
+
+      # Power off unused nodes until they are requested
+      SuspendProgram=${suspendProgram}
+      SuspendTimeout=60
+      ResumeProgram=${resumeProgram}
+      ResumeTimeout=300
+      SuspendExcNodes=hut
+
+      # Turn the nodes off after 1 hour of inactivity
+      SuspendTime=3600
+
+      # Reduce port range so we can allow only this range in the firewall
+      SrunPortRange=60000-61000
+
+      # Use cores as consumable resources. In SLURM terms, a core may have
+      # multiple hardware threads (or CPUs).
+      SelectType=select/cons_tres
+
+      # Ignore memory constraints and only use unused cores to share a node with
+      # other jobs.
+      SelectTypeParameters=CR_Core
+
+      # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
+      # This sets up the "extern" step into which ssh-launched processes will be
+      # adopted. Alloc runs the prolog at job allocation (salloc) rather than
+      # when a task runs (srun) so we can ssh early.
+      PrologFlags=Alloc,Contain,X11
+
+      # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
+      # adopted by the external step, similar to tasks running in regular steps
+      # LaunchParameters=ulimit_pam_adopt
+      SlurmdDebug=debug5
+      #DebugFlags=Protocol,Cgroup
+    '';
+
+    extraCgroupConfig = ''
+      CgroupPlugin=cgroup/v2
+      #ConstrainCores=yes
+    '';
+  };
+
+  # Place the slurm config in /etc as this will be required by PAM
+  environment.etc.slurm.source = config.services.slurm.etcSlurm;
+
+  age.secrets.mungeKey = {
+    file = ../../secrets/munge-key.age;
+    owner = "munge";
+    group = "munge";
+  };
+
+  services.munge = {
+    enable = true;
+    password = config.age.secrets.mungeKey.path;
+  };
 }
--- a/m/module/slurm-common.nix
+++ b/m/module/slurm-common.nix
@@ -1,115 +0,0 @@
-{ config, pkgs, ... }:
-
-let
-  suspendProgram = pkgs.writeShellScript "suspend.sh" ''
-    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
-    set -x
-    export "PATH=/run/current-system/sw/bin:$PATH"
-    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
-    hosts=$(scontrol show hostnames $1)
-    for host in $hosts; do
-      echo Shutting down host: $host
-      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
-    done
-  '';
-
-  resumeProgram = pkgs.writeShellScript "resume.sh" ''
-    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
-    set -x
-    export "PATH=/run/current-system/sw/bin:$PATH"
-    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
-    hosts=$(scontrol show hostnames $1)
-    for host in $hosts; do
-      echo Starting host: $host
-      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
-    done
-  '';
-
-in {
-  services.slurm = {
-    controlMachine = "apex";
-    clusterName = "jungle";
-    nodeName = [
-      "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
-      "fox       Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
-    ];
-
-    partitionName = [
-      "owl Nodes=owl[1-2]     Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
-      "fox Nodes=fox          Default=NO  DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
-    ];
-
-    # See slurm.conf(5) for more details about these options.
-    extraConfig = ''
-      # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
-      # not with Intel MPI. For that use the compatibility shim libpmi.so
-      # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
-      # library in SLURM (--mpi=pmix). See more details here:
-      # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
-      MpiDefault=pmix
-
-      # When a node reboots return that node to the slurm queue as soon as it
-      # becomes operative again.
-      ReturnToService=2
-
-      # Track all processes by using a cgroup
-      ProctrackType=proctrack/cgroup
-
-      # Enable task/affinity to allow the jobs to run in a specified subset of
-      # the resources. Use the task/cgroup plugin to enable process containment.
-      TaskPlugin=task/affinity,task/cgroup
-
-      # Power off unused nodes until they are requested
-      SuspendProgram=${suspendProgram}
-      SuspendTimeout=60
-      ResumeProgram=${resumeProgram}
-      ResumeTimeout=300
-      SuspendExcNodes=fox
-
-      # Turn the nodes off after 1 hour of inactivity
-      SuspendTime=3600
-
-      # Reduce port range so we can allow only this range in the firewall
-      SrunPortRange=60000-61000
-
-      # Use cores as consumable resources. In SLURM terms, a core may have
-      # multiple hardware threads (or CPUs).
-      SelectType=select/cons_tres
-
-      # Ignore memory constraints and only use unused cores to share a node with
-      # other jobs.
-      SelectTypeParameters=CR_Core
-
-      # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
-      # This sets up the "extern" step into which ssh-launched processes will be
-      # adopted. Alloc runs the prolog at job allocation (salloc) rather than
-      # when a task runs (srun) so we can ssh early.
-      PrologFlags=Alloc,Contain,X11
-
-      # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
-      # adopted by the external step, similar to tasks running in regular steps
-      # LaunchParameters=ulimit_pam_adopt
-      SlurmdDebug=debug5
-      #DebugFlags=Protocol,Cgroup
-    '';
-
-    extraCgroupConfig = ''
-      CgroupPlugin=cgroup/v2
-      #ConstrainCores=yes
-    '';
-  };
-
-  # Place the slurm config in /etc as this will be required by PAM
-  environment.etc.slurm.source = config.services.slurm.etcSlurm;
-
-  age.secrets.mungeKey = {
-    file = ../../secrets/munge-key.age;
-    owner = "munge";
-    group = "munge";
-  };
-
-  services.munge = {
-    enable = true;
-    password = config.age.secrets.mungeKey.path;
-  };
-}
--- a/m/module/slurm-server.nix
+++ b/m/module/slurm-server.nix
@@ -1,23 +0,0 @@
-{ ... }:
-
-{
-  imports = [
-    ./slurm-common.nix
-  ];
-
-  services.slurm.server.enable = true;
-
-  networking.firewall = {
-    extraCommands = ''
-      # Accept slurm connections to controller from compute nodes
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
-      # Accept slurm connections from compute nodes for srun
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
-
-      # Accept slurm connections to controller from fox (via wireguard)
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
-      # Accept slurm connections from fox for srun (via wireguard)
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
-    '';
-  };
-}
--- a/m/module/ssh-hut-extern.nix
+++ b/m/module/ssh-hut-extern.nix
@@ -1,8 +1,9 @@
 {
  programs.ssh.extraConfig = ''
-    Host apex ssfhead
+    Host ssfhead
      HostName ssflogin.bsc.es
    Host hut
-      ProxyJump apex
+      ProxyJump ssfhead
+      HostName xeon07
  '';
 }
--- a/m/raccoon/configuration.nix
+++ b/m/raccoon/configuration.nix
@@ -6,7 +6,6 @@
    ../module/emulation.nix
    ../module/debuginfod.nix
    ../module/ssh-hut-extern.nix
-    ../module/nvidia.nix
    ../eudy/kernel/perf.nix
  ];

@@ -39,7 +38,6 @@
    };
    hosts = {
      "10.0.44.4" = [ "tent" ];
-      "84.88.53.236" = [ "apex" ];
    };
  };

@@ -51,7 +49,15 @@
  # Enable performance governor
  powerManagement.cpuFreqGovernor = "performance";

-  hardware.nvidia.open = false; # Maxwell is older than Turing architecture
+  # Configure Nvidia driver to use with CUDA
+  hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
+  hardware.graphics.enable = true;
+  nixpkgs.config.allowUnfree = true;
+  nixpkgs.config.nvidia.acceptLicense = true;
+  services.xserver.videoDrivers = [ "nvidia" ];
+
+  # Disable garbage collection for now
+  nix.gc.automatic = lib.mkForce false;

  services.openssh.settings.X11Forwarding = true;

--- a/m/tent/configuration.nix
+++ b/m/tent/configuration.nix
@@ -33,9 +33,6 @@
    nameservers = [ "84.88.52.35" "84.88.52.36" ];
    search = [ "bsc.es" "ac.upc.edu" ];
    defaultGateway = "10.0.44.1";
-    hosts = {
-      "84.88.53.236" = [ "apex" ];
-    };
  };

  services.p.enable = true;
--- a/m/weasel/configuration.nix
+++ b/m/weasel/configuration.nix
@@ -1,32 +0,0 @@
-{ lib, ... }:
-
-{
-  imports = [
-    ../common/ssf.nix
-  ];
-
-  # Select this using the ID to avoid mismatches
-  boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca";
-
-  # No swap, there is plenty of RAM
-  swapDevices = lib.mkForce [];
-
-  # Users with sudo access
-  users.groups.wheel.members = [ "abonerib" "anavarro" ];
-
-  # Run julia installed with juliaup using julia's own libraries:
-  # NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia
-  programs.nix-ld.enable = true;
-
-  networking = {
-    hostName = "weasel";
-    interfaces.eno1.ipv4.addresses = [ {
-      address = "10.0.40.6";
-      prefixLength = 24;
-    } ];
-    interfaces.ibp5s0.ipv4.addresses = [ {
-      address = "10.0.42.6";
-      prefixLength = 24;
-    } ];
-  };
-}
--- a/pkgs/amd-uprof/default.nix
+++ b/pkgs/amd-uprof/default.nix
@@ -1,82 +0,0 @@
-{ stdenv
-, lib
-, curl
-, cacert
-, runCommandLocal
-, autoPatchelfHook
-, elfutils
-, glib
-, libGL
-, ncurses5
-, xorg
-, zlib
-, libxkbcommon
-, freetype
-, fontconfig
-, libGLU
-, dbus
-, rocmPackages
-, libxcrypt-legacy
-}:
-
-let
-  version = "5.1.701";
-  tarball = "AMDuProf_Linux_x64_${version}.tar.bz2";
-
-  uprofSrc = runCommandLocal tarball {
-    nativeBuildInputs = [ curl ];
-    outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk=";
-    SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt";
-  } ''
-    curl \
-    -o $out \
-    'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \
-    -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \
-    -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \
-    -H 'Accept-Language: en-US,en;q=0.5' \
-    -H 'Accept-Encoding: gzip, deflate, br, zstd' \
-    -H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n'
-  '';
-
-in
-  stdenv.mkDerivation {
-    pname = "AMD-uProf";
-    inherit version;
-    src = uprofSrc;
-    dontStrip = true;
-    phases = [ "installPhase" "fixupPhase" ];
-    nativeBuildInputs = [ autoPatchelfHook ];
-    buildInputs = [
-      stdenv.cc.cc.lib
-      ncurses5
-      elfutils
-      glib
-      libGL
-      libGLU
-      libxcrypt-legacy
-      xorg.libX11
-      xorg.libXext
-      xorg.libXi
-      xorg.libXmu
-      xorg.libxcb
-      xorg.xcbutilwm
-      xorg.xcbutilrenderutil
-      xorg.xcbutilkeysyms
-      xorg.xcbutilimage
-      fontconfig.lib
-      libxkbcommon
-      zlib
-      freetype
-      dbus
-      rocmPackages.rocprofiler
-    ];
-    installPhase = ''
-      set -x
-      mkdir -p $out
-      tar -x -v -C $out --strip-components=1 -f $src
-      rm $out/bin/AMDPowerProfilerDriverSource.tar.gz
-      patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so
-      patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys
-      set +x
-    '';
-  }
--- a/pkgs/amd-uprof/driver.nix
+++ b/pkgs/amd-uprof/driver.nix
@@ -1,35 +0,0 @@
-{ stdenv
-, lib
-, amd-uprof
-, curl
-, cacert
-, kernel
-, runCommandLocal
-}:
-
-let
-  version = amd-uprof.version;
-  tarball = amd-uprof.src;
-in stdenv.mkDerivation {
-  pname = "AMDPowerProfilerDriver";
-  inherit version;
-  src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } ''
-    set -x
-    tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz
-    mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out
-    set +x
-  '';
-  hardeningDisable = [ "pic" "format" ];
-  nativeBuildInputs = kernel.moduleBuildDependencies;
-  patches = [ ./makefile.patch ./hrtimer.patch ];
-  makeFlags = [
-    "KERNEL_VERSION=${kernel.modDirVersion}"
-    "KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
-    "INSTALL_MOD_PATH=$(out)"
-  ];
-  meta = {
-    description = "AMD Power Profiler Driver";
-    homepage = "https://www.amd.com/es/developer/uprof.html";
-    platforms = lib.platforms.linux;
-  };
-}
--- a/pkgs/amd-uprof/hrtimer.patch
+++ b/pkgs/amd-uprof/hrtimer.patch
@@ -1,31 +0,0 @@
--- a/src/PmcTimerConfig.c	2025-09-04 12:17:16.771707049 +0200
-+++ b/src/PmcTimerConfig.c	2025-09-04 12:17:04.878515468 +0200
-@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo)
- 
-     DRVPRINT("pTimerConfig(%p)", pTimerConfig);
- 
-    hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
-+    hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- }
- 
- int PmcSetupTimer(ClientContext* pClientCtx)
-@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient
-                 {
-                     /* Interval in ms */
-                     pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000);
-                    pTimerConfig->m_hrTimer.function = PmcTimerCallback;
- 
-                     DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time);
-                 }
--- a/src/PwrProfTimer.c	2025-09-04 12:18:08.750544327 +0200
-+++ b/src/PwrProfTimer.c	2025-09-04 12:18:28.557863382 +0200
-@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu)
-     pCoreClientData = &per_cpu(g_coreClientData, cpu);
- 
-     // initialize HR timer
-    hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
-    pCoreClientData->m_hrTimer.function = &HrTimerCallback;
-+    hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- 
-     return;
- } // InitHrTimer
--- a/pkgs/amd-uprof/makefile.patch
+++ b/pkgs/amd-uprof/makefile.patch
@@ -1,66 +0,0 @@
--- a/Makefile	2025-06-19 20:36:49.346693267 +0200
-+++ b/Makefile	2025-06-19 20:42:29.778088660 +0200
-@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi
- MODULE_NAME_KO=$(MODULE_NAME).ko
- 
- # check is module inserted
-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
-+#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
- 
- # check pcore dkms status
- PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION))
-@@ -50,7 +50,7 @@ endif
- # “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00
- G_VERSION=9
- K_VERSION=5
-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.)
-+KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.)
- GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.)
- ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION))))
- 	ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION))))
-@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs :=  src/PmcDataBuffe
- 
- # make
- all:
-	@chmod a+x ./AMDPPcert.sh
-	@./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status;
-	@SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
-                if [ $$SIGSTATUS1 -eq 1 ]; then \
-			exit 1; \
-		fi
-	@make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules
-	@SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \
-                if [ $$SIGSTATUS3 -eq 0 ]; then \
-			./AMDPPcert.sh 1 $(MODULE_NAME_KO); \
-		fi
-+	make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules
- 
- # make clean
- clean:
-@@ -84,23 +74,9 @@ clean:
- 
- # make install
- install:
-	@mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra
-	@rm  -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO)
-	@cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/
-	@depmod -a
-	@if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \
-		echo "Uninstalling AMDPowerProfiler Linux kernel module.";\
-		rmmod $(MODULE_NAME);\
-	fi
-	@modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \
-		cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \
-		echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
-                if [ $$SIGSTATUS1 -eq 0 ]; then \
-			echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \
-			exit 1; \
-		else \
-			cat $(PWD)/sign_status1; \
-		fi
-+	mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
-+	cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
-+
- # make dkms
- dkms:
- 	@chmod a+x ./AMDPPcert.sh
--- a/pkgs/cudainfo/Makefile
+++ b/pkgs/cudainfo/Makefile
@@ -1,12 +0,0 @@
-HOSTCXX  ?= g++
-NVCC     := nvcc -ccbin $(HOSTCXX)
-CXXFLAGS := -m64
-
-# Target rules
-all: cudainfo
-
-cudainfo: cudainfo.cpp
-	$(NVCC) $(CXXFLAGS) -o $@ $<
-
-clean:
-	rm -f cudainfo cudainfo.o
--- a/pkgs/cudainfo/cudainfo.cpp
+++ b/pkgs/cudainfo/cudainfo.cpp
@@ -1,600 +0,0 @@
-/*
- * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
-
-// Shared Utilities (QA Testing)
-
-// std::system includes
-#include <memory>
-#include <iostream>
-
-#include <cuda_runtime.h>
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ )
-
-// CUDA Runtime error messages
-#ifdef __DRIVER_TYPES_H__
-static const char *_cudaGetErrorEnum(cudaError_t error)
-{
-    switch (error)
-    {
-        case cudaSuccess:
-            return "cudaSuccess";
-
-        case cudaErrorMissingConfiguration:
-            return "cudaErrorMissingConfiguration";
-
-        case cudaErrorMemoryAllocation:
-            return "cudaErrorMemoryAllocation";
-
-        case cudaErrorInitializationError:
-            return "cudaErrorInitializationError";
-
-        case cudaErrorLaunchFailure:
-            return "cudaErrorLaunchFailure";
-
-        case cudaErrorPriorLaunchFailure:
-            return "cudaErrorPriorLaunchFailure";
-
-        case cudaErrorLaunchTimeout:
-            return "cudaErrorLaunchTimeout";
-
-        case cudaErrorLaunchOutOfResources:
-            return "cudaErrorLaunchOutOfResources";
-
-        case cudaErrorInvalidDeviceFunction:
-            return "cudaErrorInvalidDeviceFunction";
-
-        case cudaErrorInvalidConfiguration:
-            return "cudaErrorInvalidConfiguration";
-
-        case cudaErrorInvalidDevice:
-            return "cudaErrorInvalidDevice";
-
-        case cudaErrorInvalidValue:
-            return "cudaErrorInvalidValue";
-
-        case cudaErrorInvalidPitchValue:
-            return "cudaErrorInvalidPitchValue";
-
-        case cudaErrorInvalidSymbol:
-            return "cudaErrorInvalidSymbol";
-
-        case cudaErrorMapBufferObjectFailed:
-            return "cudaErrorMapBufferObjectFailed";
-
-        case cudaErrorUnmapBufferObjectFailed:
-            return "cudaErrorUnmapBufferObjectFailed";
-
-        case cudaErrorInvalidHostPointer:
-            return "cudaErrorInvalidHostPointer";
-
-        case cudaErrorInvalidDevicePointer:
-            return "cudaErrorInvalidDevicePointer";
-
-        case cudaErrorInvalidTexture:
-            return "cudaErrorInvalidTexture";
-
-        case cudaErrorInvalidTextureBinding:
-            return "cudaErrorInvalidTextureBinding";
-
-        case cudaErrorInvalidChannelDescriptor:
-            return "cudaErrorInvalidChannelDescriptor";
-
-        case cudaErrorInvalidMemcpyDirection:
-            return "cudaErrorInvalidMemcpyDirection";
-
-        case cudaErrorAddressOfConstant:
-            return "cudaErrorAddressOfConstant";
-
-        case cudaErrorTextureFetchFailed:
-            return "cudaErrorTextureFetchFailed";
-
-        case cudaErrorTextureNotBound:
-            return "cudaErrorTextureNotBound";
-
-        case cudaErrorSynchronizationError:
-            return "cudaErrorSynchronizationError";
-
-        case cudaErrorInvalidFilterSetting:
-            return "cudaErrorInvalidFilterSetting";
-
-        case cudaErrorInvalidNormSetting:
-            return "cudaErrorInvalidNormSetting";
-
-        case cudaErrorMixedDeviceExecution:
-            return "cudaErrorMixedDeviceExecution";
-
-        case cudaErrorCudartUnloading:
-            return "cudaErrorCudartUnloading";
-
-        case cudaErrorUnknown:
-            return "cudaErrorUnknown";
-
-        case cudaErrorNotYetImplemented:
-            return "cudaErrorNotYetImplemented";
-
-        case cudaErrorMemoryValueTooLarge:
-            return "cudaErrorMemoryValueTooLarge";
-
-        case cudaErrorInvalidResourceHandle:
-            return "cudaErrorInvalidResourceHandle";
-
-        case cudaErrorNotReady:
-            return "cudaErrorNotReady";
-
-        case cudaErrorInsufficientDriver:
-            return "cudaErrorInsufficientDriver";
-
-        case cudaErrorSetOnActiveProcess:
-            return "cudaErrorSetOnActiveProcess";
-
-        case cudaErrorInvalidSurface:
-            return "cudaErrorInvalidSurface";
-
-        case cudaErrorNoDevice:
-            return "cudaErrorNoDevice";
-
-        case cudaErrorECCUncorrectable:
-            return "cudaErrorECCUncorrectable";
-
-        case cudaErrorSharedObjectSymbolNotFound:
-            return "cudaErrorSharedObjectSymbolNotFound";
-
-        case cudaErrorSharedObjectInitFailed:
-            return "cudaErrorSharedObjectInitFailed";
-
-        case cudaErrorUnsupportedLimit:
-            return "cudaErrorUnsupportedLimit";
-
-        case cudaErrorDuplicateVariableName:
-            return "cudaErrorDuplicateVariableName";
-
-        case cudaErrorDuplicateTextureName:
-            return "cudaErrorDuplicateTextureName";
-
-        case cudaErrorDuplicateSurfaceName:
-            return "cudaErrorDuplicateSurfaceName";
-
-        case cudaErrorDevicesUnavailable:
-            return "cudaErrorDevicesUnavailable";
-
-        case cudaErrorInvalidKernelImage:
-            return "cudaErrorInvalidKernelImage";
-
-        case cudaErrorNoKernelImageForDevice:
-            return "cudaErrorNoKernelImageForDevice";
-
-        case cudaErrorIncompatibleDriverContext:
-            return "cudaErrorIncompatibleDriverContext";
-
-        case cudaErrorPeerAccessAlreadyEnabled:
-            return "cudaErrorPeerAccessAlreadyEnabled";
-
-        case cudaErrorPeerAccessNotEnabled:
-            return "cudaErrorPeerAccessNotEnabled";
-
-        case cudaErrorDeviceAlreadyInUse:
-            return "cudaErrorDeviceAlreadyInUse";
-
-        case cudaErrorProfilerDisabled:
-            return "cudaErrorProfilerDisabled";
-
-        case cudaErrorProfilerNotInitialized:
-            return "cudaErrorProfilerNotInitialized";
-
-        case cudaErrorProfilerAlreadyStarted:
-            return "cudaErrorProfilerAlreadyStarted";
-
-        case cudaErrorProfilerAlreadyStopped:
-            return "cudaErrorProfilerAlreadyStopped";
-
-        /* Since CUDA 4.0*/
-        case cudaErrorAssert:
-            return "cudaErrorAssert";
-
-        case cudaErrorTooManyPeers:
-            return "cudaErrorTooManyPeers";
-
-        case cudaErrorHostMemoryAlreadyRegistered:
-            return "cudaErrorHostMemoryAlreadyRegistered";
-
-        case cudaErrorHostMemoryNotRegistered:
-            return "cudaErrorHostMemoryNotRegistered";
-
-        /* Since CUDA 5.0 */
-        case cudaErrorOperatingSystem:
-            return "cudaErrorOperatingSystem";
-
-        case cudaErrorPeerAccessUnsupported:
-            return "cudaErrorPeerAccessUnsupported";
-
-        case cudaErrorLaunchMaxDepthExceeded:
-            return "cudaErrorLaunchMaxDepthExceeded";
-
-        case cudaErrorLaunchFileScopedTex:
-            return "cudaErrorLaunchFileScopedTex";
-
-        case cudaErrorLaunchFileScopedSurf:
-            return "cudaErrorLaunchFileScopedSurf";
-
-        case cudaErrorSyncDepthExceeded:
-            return "cudaErrorSyncDepthExceeded";
-
-        case cudaErrorLaunchPendingCountExceeded:
-            return "cudaErrorLaunchPendingCountExceeded";
-
-        case cudaErrorNotPermitted:
-            return "cudaErrorNotPermitted";
-
-        case cudaErrorNotSupported:
-            return "cudaErrorNotSupported";
-
-        /* Since CUDA 6.0 */
-        case cudaErrorHardwareStackError:
-            return "cudaErrorHardwareStackError";
-
-        case cudaErrorIllegalInstruction:
-            return "cudaErrorIllegalInstruction";
-
-        case cudaErrorMisalignedAddress:
-            return "cudaErrorMisalignedAddress";
-
-        case cudaErrorInvalidAddressSpace:
-            return "cudaErrorInvalidAddressSpace";
-
-        case cudaErrorInvalidPc:
-            return "cudaErrorInvalidPc";
-
-        case cudaErrorIllegalAddress:
-            return "cudaErrorIllegalAddress";
-
-        /* Since CUDA 6.5*/
-        case cudaErrorInvalidPtx:
-            return "cudaErrorInvalidPtx";
-
-        case cudaErrorInvalidGraphicsContext:
-            return "cudaErrorInvalidGraphicsContext";
-
-        case cudaErrorStartupFailure:
-            return "cudaErrorStartupFailure";
-
-        case cudaErrorApiFailureBase:
-            return "cudaErrorApiFailureBase";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-template< typename T >
-void check(T result, char const *const func, const char *const file, int const line)
-{
-    if (result)
-    {
-        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
-                file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
-        cudaDeviceReset();
-        // Make sure we call CUDA Device Reset before exiting
-        exit(EXIT_FAILURE);
-    }
-}
-
-int *pArgc = NULL;
-char **pArgv = NULL;
-
-#if CUDART_VERSION < 5000
-
-// CUDA-C includes
-#include <cuda.h>
-
-// This function wraps the CUDA Driver API into a template function
-template <class T>
-inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
-{
-    CUresult error =    cuDeviceGetAttribute(attribute, device_attribute, device);
-
-    if (CUDA_SUCCESS != error) {
-        fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
-                error, __FILE__, __LINE__);
-
-        // cudaDeviceReset causes the driver to clean up all state. While
-        // not mandatory in normal operation, it is good practice.  It is also
-        // needed to ensure correct operation when the application is being
-        // profiled. Calling cudaDeviceReset causes all profile data to be
-        // flushed before the application exits
-        cudaDeviceReset();
-        exit(EXIT_FAILURE);
-    }
-}
-
-#endif /* CUDART_VERSION < 5000 */
-
-// Beginning of GPU Architecture definitions
-inline int ConvertSMVer2Cores(int major, int minor)
-{
-    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
-    typedef struct {
-        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
-        int Cores;
-    } sSMtoCores;
-
-    sSMtoCores nGpuArchCoresPerSM[] = {
-        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
-        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
-        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
-        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
-        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
-        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
-        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
-        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
-        {   -1, -1 }
-    };
-
-    int index = 0;
-
-    while (nGpuArchCoresPerSM[index].SM != -1) {
-        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
-            return nGpuArchCoresPerSM[index].Cores;
-        }
-
-        index++;
-    }
-
-    // If we don't find the values, we default use the previous one to run properly
-    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
-    return nGpuArchCoresPerSM[index-1].Cores;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Program main
-////////////////////////////////////////////////////////////////////////////////
-int
-main(int argc, char **argv)
-{
-    pArgc = &argc;
-    pArgv = argv;
-
-    printf("%s Starting...\n\n", argv[0]);
-    printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
-
-    int deviceCount = 0;
-    cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
-
-    if (error_id != cudaSuccess) {
-        printf("cudaGetDeviceCount failed: %s (%d)\n",
-			cudaGetErrorString(error_id), (int) error_id);
-        printf("Result = FAIL\n");
-        exit(EXIT_FAILURE);
-    }
-
-    // This function call returns 0 if there are no CUDA capable devices.
-    if (deviceCount == 0)
-        printf("There are no available device(s) that support CUDA\n");
-    else
-        printf("Detected %d CUDA Capable device(s)\n", deviceCount);
-
-    int dev, driverVersion = 0, runtimeVersion = 0;
-
-    for (dev = 0; dev < deviceCount; ++dev) {
-        cudaSetDevice(dev);
-        cudaDeviceProp deviceProp;
-        cudaGetDeviceProperties(&deviceProp, dev);
-
-        printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
-
-        // Console log
-        cudaDriverGetVersion(&driverVersion);
-        cudaRuntimeGetVersion(&runtimeVersion);
-        printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
-        printf("  CUDA Capability Major/Minor version number:    %d.%d\n", deviceProp.major, deviceProp.minor);
-
-        printf("  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n",
-                (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
-
-        printf("  (%2d) Multiprocessors, (%3d) CUDA Cores/MP:     %d CUDA Cores\n",
-               deviceProp.multiProcessorCount,
-               ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
-               ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
-        printf("  GPU Max Clock rate:                            %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
-
-
-#if CUDART_VERSION >= 5000
-        // This is supported in CUDA 5.0 (runtime API device properties)
-        printf("  Memory Clock rate:                             %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
-        printf("  Memory Bus Width:                              %d-bit\n",   deviceProp.memoryBusWidth);
-
-        if (deviceProp.l2CacheSize) {
-            printf("  L2 Cache Size:                                 %d bytes\n", deviceProp.l2CacheSize);
-        }
-
-#else
-        // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
-        int memoryClock;
-        getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
-        printf("  Memory Clock rate:                             %.0f Mhz\n", memoryClock * 1e-3f);
-        int memBusWidth;
-        getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
-        printf("  Memory Bus Width:                              %d-bit\n", memBusWidth);
-        int L2CacheSize;
-        getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
-
-        if (L2CacheSize) {
-            printf("  L2 Cache Size:                                 %d bytes\n", L2CacheSize);
-        }
-
-#endif
-
-        printf("  Maximum Texture Dimension Size (x,y,z)         1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
-               deviceProp.maxTexture1D   , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
-               deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
-        printf("  Maximum Layered 1D Texture Size, (num) layers  1D=(%d), %d layers\n",
-               deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
-        printf("  Maximum Layered 2D Texture Size, (num) layers  2D=(%d, %d), %d layers\n",
-               deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
-
-
-        printf("  Total amount of constant memory:               %lu bytes\n", deviceProp.totalConstMem);
-        printf("  Total amount of shared memory per block:       %lu bytes\n", deviceProp.sharedMemPerBlock);
-        printf("  Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
-        printf("  Warp size:                                     %d\n", deviceProp.warpSize);
-        printf("  Maximum number of threads per multiprocessor:  %d\n", deviceProp.maxThreadsPerMultiProcessor);
-        printf("  Maximum number of threads per block:           %d\n", deviceProp.maxThreadsPerBlock);
-        printf("  Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
-               deviceProp.maxThreadsDim[0],
-               deviceProp.maxThreadsDim[1],
-               deviceProp.maxThreadsDim[2]);
-        printf("  Max dimension size of a grid size    (x,y,z): (%d, %d, %d)\n",
-               deviceProp.maxGridSize[0],
-               deviceProp.maxGridSize[1],
-               deviceProp.maxGridSize[2]);
-        printf("  Maximum memory pitch:                          %lu bytes\n", deviceProp.memPitch);
-        printf("  Texture alignment:                             %lu bytes\n", deviceProp.textureAlignment);
-        printf("  Concurrent copy and kernel execution:          %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
-        printf("  Run time limit on kernels:                     %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
-        printf("  Integrated GPU sharing Host Memory:            %s\n", deviceProp.integrated ? "Yes" : "No");
-        printf("  Support host page-locked memory mapping:       %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
-        printf("  Alignment requirement for Surfaces:            %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
-        printf("  Device has ECC support:                        %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-        printf("  CUDA Device Driver Mode (TCC or WDDM):         %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
-#endif
-        printf("  Device supports Unified Addressing (UVA):      %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
-        printf("  Device PCI Domain ID / Bus ID / location ID:   %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
-
-        const char *sComputeMode[] = {
-            "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
-            "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
-            "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
-            "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
-            "Unknown",
-            NULL
-        };
-        printf("  Compute Mode:\n");
-        printf("     < %s >\n", sComputeMode[deviceProp.computeMode]);
-    }
-
-    // If there are 2 or more GPUs, query to determine whether RDMA is supported
-    if (deviceCount >= 2)
-    {
-        cudaDeviceProp prop[64];
-        int gpuid[64]; // we want to find the first two GPU's that can support P2P
-        int gpu_p2p_count = 0;
-
-        for (int i=0; i < deviceCount; i++)
-        {
-            checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
-
-            // Only boards based on Fermi or later can support P2P
-            if ((prop[i].major >= 2)
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-                // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
-                && prop[i].tccDriver
-#endif
-               )
-            {
-                // This is an array of P2P capable GPUs
-                gpuid[gpu_p2p_count++] = i;
-            }
-        }
-
-        // Show all the combinations of support P2P GPUs
-        int can_access_peer_0_1, can_access_peer_1_0;
-
-        if (gpu_p2p_count >= 2)
-        {
-            for (int i = 0; i < gpu_p2p_count-1; i++)
-            {
-                for (int j = 1; j < gpu_p2p_count; j++)
-                {
-                    checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
-                    printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
-                           prop[gpuid[j]].name, gpuid[j] ,
-                           can_access_peer_0_1 ? "Yes" : "No");
-                }
-            }
-
-            for (int j = 1; j < gpu_p2p_count; j++)
-            {
-                for (int i = 0; i < gpu_p2p_count-1; i++)
-                {
-                    checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
-                    printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
-                           prop[gpuid[i]].name, gpuid[i] ,
-                           can_access_peer_1_0 ? "Yes" : "No");
-                }
-            }
-        }
-    }
-
-    // csv masterlog info
-    // *****************************
-    // exe and CUDA driver name
-    printf("\n");
-    std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
-    char cTemp[128];
-
-    // driver version
-    sProfileString += ", CUDA Driver Version = ";
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-    sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
-#else
-    sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
-#endif
-    sProfileString +=  cTemp;
-
-    // Runtime version
-    sProfileString += ", CUDA Runtime Version = ";
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-    sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
-#else
-    sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
-#endif
-    sProfileString +=  cTemp;
-
-    // Device count
-    sProfileString += ", NumDevs = ";
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-    sprintf_s(cTemp, 10, "%d", deviceCount);
-#else
-    sprintf(cTemp, "%d", deviceCount);
-#endif
-    sProfileString += cTemp;
-
-    // Print Out all device Names
-    for (dev = 0; dev < deviceCount; ++dev)
-    {
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-        sprintf_s(cTemp, 13, ", Device%d = ", dev);
-#else
-        sprintf(cTemp, ", Device%d = ", dev);
-#endif
-        cudaDeviceProp deviceProp;
-        cudaGetDeviceProperties(&deviceProp, dev);
-        sProfileString += cTemp;
-        sProfileString += deviceProp.name;
-    }
-
-    sProfileString += "\n";
-    printf("%s", sProfileString.c_str());
-
-    printf("Result = PASS\n");
-
-    // finish
-    // cudaDeviceReset causes the driver to clean up all state. While
-    // not mandatory in normal operation, it is good practice.  It is also
-    // needed to ensure correct operation when the application is being
-    // profiled. Calling cudaDeviceReset causes all profile data to be
-    // flushed before the application exits
-    cudaDeviceReset();
-    return 0;
-}
--- a/pkgs/cudainfo/default.nix
+++ b/pkgs/cudainfo/default.nix
@@ -1,43 +0,0 @@
-{
-  stdenv
-, cudatoolkit
-, cudaPackages
-, autoAddDriverRunpath
-, strace
-}:
-
-stdenv.mkDerivation (finalAttrs: {
-  name = "cudainfo";
-  src = ./.;
-  buildInputs = [
-    cudatoolkit # Required for nvcc
-    cudaPackages.cuda_cudart.static # Required for -lcudart_static
-    autoAddDriverRunpath
-  ];
-  installPhase = ''
-    mkdir -p $out/bin
-    cp -a cudainfo $out/bin
-  '';
-  passthru.gpuCheck = stdenv.mkDerivation {
-    name = "cudainfo-test";
-    requiredSystemFeatures = [ "cuda" ];
-    dontBuild = true;
-    nativeCheckInputs = [
-      finalAttrs.finalPackage # The cudainfo package from above
-      strace # When it fails, it will show the trace
-    ];
-    dontUnpack = true;
-    doCheck = true;
-    checkPhase = ''
-      if ! cudainfo; then
-        set -x
-        cudainfo=$(command -v cudainfo)
-        ldd $cudainfo
-        readelf -d $cudainfo
-        strace -f $cudainfo
-        set +x
-      fi
-    '';
-    installPhase = "touch $out";
-  };
-})
--- a/pkgs/mpich-fix-hwtopo.patch
+++ b/pkgs/mpich-fix-hwtopo.patch
@@ -0,0 +1,36 @@
+diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
+index 33e88bc..ee3641c 100644
+--- a/src/util/mpir_hwtopo.c
+++ b/src/util/mpir_hwtopo.c
+@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
+ #ifdef HAVE_HWLOC
+     bindset = hwloc_bitmap_alloc();
+     hwloc_topology_init(&hwloc_topology);
+-    char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
+-    if (xmlfile != NULL) {
+-        int rc;
+-        rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
+-        if (rc == 0) {
+-            /* To have hwloc still actually call OS-specific hooks, the
+-             * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
+-             * file is really the underlying system. */
+-            hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
+-        }
+-        MPL_free(xmlfile);
+-    }
+
+     hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
+     if (!hwloc_topology_load(hwloc_topology)) 
+
+--- a/src/mpi/init/local_proc_attrs.c
+++ b/src/mpi/init/local_proc_attrs.c
+@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
+     /* Set the number of tag bits. The device may override this value. */
+     MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
+
+-    char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
+-    MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
+-    MPL_free(requested_kinds);
+-
+     return mpi_errno;
+ }
--- a/pkgs/overlay.nix
+++ b/pkgs/overlay.nix
@@ -11,6 +11,10 @@ final: prev:
      paths = [ pmix.dev pmix.out ];
    };
  in prev.mpich.overrideAttrs (old: {
+    patches = (old.patches or []) ++ [
+      # See https://github.com/pmodels/mpich/issues/6946
+      ./mpich-fix-hwtopo.patch
+    ];
    buildInput = old.buildInputs ++ [
      libfabric
      pmixAll
@@ -52,16 +56,4 @@ final: prev:
  prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
  meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
  upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
-  cudainfo = prev.callPackage ./cudainfo/default.nix { };
-
-  amd-uprof = prev.callPackage ./amd-uprof/default.nix { };
-
-  # FIXME: Extend this to all linuxPackages variants. Open problem, see:
-  # https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636
-  linuxPackages = prev.linuxPackages.extend (_final: _prev: {
-    amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
-  });
-  linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: {
-    amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
-  });
 }
--- a/secrets/ceph-user.age
+++ b/secrets/ceph-user.age
@@ -1,25 +1,19 @@
 age-encryption.org/v1
-> ssh-ed25519 AY8zKw /gmhFOFqOs8IobAImvQVKeM5Y6k0FpuR61/Cu5drVVI
-g9FXJg2oIoien0zJ70FWHwSTM8SBwbpS188S3Swj7EM
-> ssh-ed25519 sgAamA opPjlWPhSiI0Rd5l7kd204S5FXFLcQcQftyKb7MDmnU
-3XrRDVnglCP+vBwvfd1rP5gHttsGDHyXwbf10a8/kKY
-> ssh-ed25519 HY2yRg QKZbubM76C3tobPoyCFDRclA9Pzb2fC7s4WOoIgdORc
-K5kckU0KhQFTE6SikJXFJgM41Tco5+VqOsaG0qLrY1Q
-> ssh-ed25519 fw2Xhg +ohqts8dLFjvdHxrGHcOGxU0dm+V3N//giljHkobpDM
-jR/UzGrfS9lrJ/VeolKLxfzeJAf2fIB2pdIn/6ukqNk
-> ssh-ed25519 tcumPQ 3DPkDPIQQSVtXSLzIRETsIyXQ0k1o18Evn6vf+l/6R8
-bLXF62OmJjnOT1vvgq3+AcOKKSG5NonrK5EqCVc0Mwo
-> ssh-ed25519 JJ1LWg 2Wefc7eLolMU5InEmCNTq21Mf71mI0a2N1HgDrlHvy4
-qXFW9CQBnrzubZ0mzS0Io2WGRrwGBkmeYndBTcZn/fM
-> ssh-ed25519 cDBabA oiH36AoIt/fFFYgnoxtH7OoetP+2/wjtn8qo3RJDSHc
-qKmkxy1aZGP4ZwC0iH7n7hiJ0+rFQYvjQb5O1a1Z0r4
-> ssh-ed25519 cK5kHw bX3RtO5StMejUYWAaA37fjHA5nO7Xs1vWDQk3yOjs2o
-Egxmcf8FKAd+E5hMLmhV1yQsCo5rJyUazf1szOvpTAM
-> ssh-ed25519 CAWG4Q oKqqRDJH0w8lsoQBQk0w8PO+z5gFNmSaGBUSumvDp1I
-m1zWp9MfViAmtpbJhqOHraIokDaPKb0DvvO4vAGCTWI
-> ssh-ed25519 xA739A G26kPOz6sbFATs+KAr7gbDvji13eA1smFusQAOJXMwA
-Sppvz7A103kZoNxoGsd6eXeCvVh7mBE2MRwLFj9O1dY
-> ssh-ed25519 MSF3dg 55ekNcp+inbUd+GQ/VZ7BoBASaJ8YDqF74CVXy1PUxQ
-aTHLLAbzQPWWld/OT3BKebc6FcmsqMTaWCPBGm1UHic
--- mVkAMnI9XQhS3fMiFuuXP/yLR9wEG9+Rr8pA4Uc0avY
-<04>DU <20><>s<EFBFBD><73><EFBFBD><EFBFBD>j<EFBFBD><6A>M<EFBFBD><4D>$<24>[<5B>M<EFBFBD><4D><EFBFBD><03>[_<>K7s<37>ju<>v<EFBFBD>D<EFBFBD>4<EFBFBD>g<EFBFBD><67>܄3<>Gn<47><6E><EFBFBD> ɽ<>P<EFBFBD>7~rZs<><73>
+-> ssh-ed25519 AY8zKw xeyzSqfio6SMS9SqywR+7II80D12Oha9T5zOgAIABSQ
+ST26VaF2G1xv9l7d3jWKG32ssOivfwx+p9jLLV7ZFnU
+-> ssh-ed25519 sgAamA HrRx+x7NjXKVDaealWFo+Q8zMAdzoj6nTBxw0KMi3jE
+nlcEVTDTe1mPeS16/t9GYRnSSkm5EjpeiBZPIC/2f8U
+-> ssh-ed25519 HY2yRg NDp5vUeX35rDV78DFQi9fsc71pQNVE8YQ1StCp+YjTg
+MdUAWHd1k6Jed2pp7Wct/DgF6ShqXFwNxPaXeBOLAcs
+-> ssh-ed25519 tcumPQ d0zVVB8t7W9KUapOsnsrvpAj7LgM9zS0yCv8SQnF0g8
+aAPaWRTEBEQgmCkRG69NuWZ/lEva7vH+L8ifQSE0Z1I
+-> ssh-ed25519 JJ1LWg 4l8GZNdGOSbqKvmKq1q1aPvjeQIwpgbJj4DBYBse7x4
+rNhTiZlwzyOiCLzYRSzJ5AHebbv94dOgl1UyNmDJD8A
+-> ssh-ed25519 CAWG4Q vGhwJDLJIAU8BpV6GP8Dnz2pvTAMufY4v4nvrr2O9yw
+hNZZFDYUMPQNM5+qcc5arIgqQw0PXuqq1WWDTpE+EHo
+-> ssh-ed25519 xA739A 8eEi9S5dMWPVR4fKVZdV5eHBOJVf2Ap+3qHSYtYHYgc
+GcgzvJiqsNyZTVk12Z0FEnqB4LgfQ1xjKQwXdto1Hjs
+-> ssh-ed25519 MSF3dg oUY9IjDR6hi1qbrCV5z5IcYj85cMppxO94iqkD60Eww
+cBzFGrhh+kWjIi0llw2RqACU1pa7XT9kqWkSeAY8VGI
+--- q7AaMOj7ZaS+Mf6trWK56o/1q/c2urrQBPAqk4PtATA
+~<7E><>k-/<2F>Xw<58><77><1D>V<EFBFBD>(<18><>Z<EFBFBD>d\<5C><>t<EFBFBD>'q<><71><EFBFBD>3<EFBFBD><33>R<EFBFBD>a\yFW<46>
--- a/secrets/gitea-runner-token.age
+++ b/secrets/gitea-runner-token.age
@@ -1,13 +1,11 @@
 age-encryption.org/v1
-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik
-DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI
-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio
-hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU
-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw
-hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs
-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY
-eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM
-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg
-/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg
--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs
-<EFBFBD><EFBFBD><EFBFBD><EFBFBD>YM<EFBFBD><EFBFBD>:E O<><4F>2<EFBFBD>r=<15>&4<><04>CQΣ<51><CEA3>hC<68><43><EFBFBD>cb<63>^Sy<53><79>%	<09><>x-vC`g<><15><><EFBFBD><EFBFBD>W^<5E><>wVG<0B><><EFBFBD>
+-> ssh-ed25519 HY2yRg WUMWvyagPalsy7u1RaEFAwJvFowso1/quNBo+nAkxhQ
+OHcebB7koPKhy58A6qngEVNWckkWChyEK3dwgy8EL5o
+-> ssh-ed25519 CAWG4Q Yx/HLIryUNE2BaqTl84FrNRy4XLCY2TRkRgbA9k3qU4
+LZljfuLS5yMVVK6N57iC6cKEaFP6Hh2OkvWJjuFg8q0
+-> ssh-ed25519 xA739A DOXjPRttSWz51Sr7KfjgKfAtaIYMo3foB1Ywqw9HYDY
+CA5puXK/1HDOitA2XHBI3OdKmZ7BzHst4DyuWGMC6hE
+-> ssh-ed25519 MSF3dg +2LetdIiIZUk7wtHNS1tYsLo4ypwqZ9gpg77RQrnzHU
+yIUu8BVbF3dhUx3531RR50/cJQd9gd8VfKUQzEeT/iQ
+--- oY/wQ+RjZO2CmKZtbQ0yOVZ5fv2+AlvvkRu1UDfCNAA
+_8`G<>=C7@x&<26><>\Ft<46>)<29><><EFBFBD><EFBFBD><EFBFBD>cPe<50><65>%<25>ֽ[zX-0<>[<11><><EFBFBD>ɲ<><C9B2>tz<74><7A>;%<25><><EFBFBD><EFBFBD><EFBFBD>~<7E>H0<48>؃*XD<58>;<3B><>
--- a/secrets/gitlab-bsc-docker-token.age
+++ b/secrets/gitlab-bsc-docker-token.age
--- a/secrets/gitlab-runner-docker-token.age
+++ b/secrets/gitlab-runner-docker-token.age
@@ -1,13 +1,11 @@
 age-encryption.org/v1
-> ssh-ed25519 HY2yRg U2KQWviZIVNemm9e8h7H+eOzoYNxXgLLS3hsZLMAuGk
-6n5dH1McNzk3rscP4v2pqZYDWtUFMd15rZsEd/mqIFM
-> ssh-ed25519 cK5kHw Ebrj/cpz1cFWAYAV9OxgyyH85OEMUnfUIV66p7jaoFY
-6J7hWqODtS/fIF4BpxhxbrxZq5vbolvbLqRKqazT02M
-> ssh-ed25519 CAWG4Q mXqoQH9ycHF7u0y8mazCgynHxNLxTnrmQHke+2a5QCc
-mq6PdSF+KOqthuXwzTCsOQsi5KG0z1wHUck+bSTyOBY
-> ssh-ed25519 xA739A TADeswueqDEroZWLjMw3RDNwVQ2xRD+JUMVZENovn0M
-KFlnSjVFbjc+ZsbY8Ed7edC5B01TJGzd/dSryiLArPc
-> ssh-ed25519 MSF3dg Pq+ZD8AqJGDHDbd4PO1ngNFST8+6C2ghZkO/knKzzEc
-wyiL/u38hdQMokmfTsBrY7CtYwc+31FG4EDaqVEn31U
--- 1z4cOipayh0zYkvasEVEvGreajegE/dqBV7b6E7aFh0
-<EFBFBD><EFBFBD><EFBFBD><EFBFBD>R<EFBFBD>@<40>/i<>I'<27><><EFBFBD>Nx<4E>r"<1D>`<1E>O<EFBFBD><4F><EFBFBD>y<><79>8<EFBFBD><38> \/<2F><>I<19><17>D<EFBFBD>`<60>ߓ<EFBFBD><DF93><EFBFBD><1E><04>uy<75><79><EFBFBD>:9Lt<4C><1D><><EFBFBD>؋<EFBFBD><D88B><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>AU<41><55><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>`<60>;<3B>q8<71>GLU#<23>i<EFBFBD>y<EFBFBD><79>i<03>ڜ
+-> ssh-ed25519 HY2yRg 6C5Cv7ILdBrpMkCTT/insUY0kyQWbfgU500Ai8ePOXY
+tMw6ehFrsq2dvDEXkLOJwrNZfI28trlr9uy3xW/fzpA
+-> ssh-ed25519 CAWG4Q x/j+364IYURgt7fhIPBzabbWMEg08nX8MRrJM/1Q6RU
+AL5Ut2rDr3UXcQXMZJ53ZMf5wMHmT83whx0ntJfW/WU
+-> ssh-ed25519 xA739A QjXftBsoGV1rVeHSKcsjp+HMpRVsaHOeeGdDcF6ZWg4
+ovVoYPaPn3liGPAxHWY37CBIUFjAXurv6jMWs2He3HQ
+-> ssh-ed25519 MSF3dg FG0CQOj9fRlneW5QrWiy5ksRpicUwHqX9QMpZWhDImw
+L20n1vZRepsRPT4xM6TO6PcI/MJxw4mBLUF0EPv9Uhs
+--- DEi7iuzkniq0JPatJ5f2KhrhxWid7ojHpvNfUCGxFtk
+<EFBFBD><EFBFBD>%	n<><6E>!;^Q<>rqG<71>:<3A>jC.8l<38>|<7C><>o<EFBFBD><1E><>$LYy<59>N<EFBFBD>b<EFBFBD><1E><>:<14>{<7B><><EFBFBD>fާxTS\<5C>t<04>U<EFBFBD><55>\F<>)%<25><><EFBFBD>KL<4B>㙇p<E39987>:><3E><><EFBFBD><EFBFBD>&<1B>)<0B>Q<EFBFBD>1<>H܃V<DC83>Sޑ<53>n<>
--- a/secrets/gitlab-runner-shell-token.age
+++ b/secrets/gitlab-runner-shell-token.age
--- a/secrets/ipmi.yml.age
+++ b/secrets/ipmi.yml.age
--- a/secrets/jungle-robot-password.age
+++ b/secrets/jungle-robot-password.age
--- a/secrets/munge-key.age
+++ b/secrets/munge-key.age
--- a/secrets/nix-serve.age
+++ b/secrets/nix-serve.age
--- a/secrets/secrets.nix
+++ b/secrets/secrets.nix
@@ -2,8 +2,6 @@ let
  keys = import ../keys.nix;
  adminsKeys = builtins.attrValues keys.admins;
  hut = [ keys.hosts.hut ] ++ adminsKeys;
-  fox = [ keys.hosts.fox ] ++ adminsKeys;
-  apex = [ keys.hosts.apex ] ++ adminsKeys;
  mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
  tent = [ keys.hosts.tent ] ++ adminsKeys;
  # Only expose ceph keys to safe nodes and admins
@@ -26,7 +24,4 @@ in

  "ceph-user.age".publicKeys = safe;
  "munge-key.age".publicKeys = safe;
-
-  "wg-fox.age".publicKeys = fox;
-  "wg-apex.age".publicKeys = apex;
 }
--- a/secrets/tent-gitlab-runner-bsc-docker-token.age
+++ b/secrets/tent-gitlab-runner-bsc-docker-token.age
--- a/secrets/tent-gitlab-runner-pm-docker-token.age
+++ b/secrets/tent-gitlab-runner-pm-docker-token.age
--- a/secrets/tent-gitlab-runner-pm-shell-token.age
+++ b/secrets/tent-gitlab-runner-pm-shell-token.age
@@ -1,13 +1,13 @@
 age-encryption.org/v1
-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY
-RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4
-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE
-Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM
-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE
-FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU
-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU
-sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0
-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc
-8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA
--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ
-b<>#<23><>M.<16>@<40>t<EFBFBD><74><EFBFBD>ŵ}+ό#@<40><><EFBFBD><EFBFBD><EFBFBD>k<EFBFBD>y<EFBFBD><79><EFBFBD>?v<><76>n<1F><>T<EFBFBD>+<2B><><EFBFBD>[<5B>Q<EFBFBD> gA<67><41><EFBFBD>
+-> ssh-ed25519 G5LX5w V9bHLoGuY4stRwbzVS9Qa0L9yoY+UoCoXc+dJJQW/Ag
+2ut9GfdJ3KBCqZRaloZCQsl8MLfaZAZxqj6JtPJzu2k
+-> ssh-ed25519 CAWG4Q OAqnIfMECpKglZ7aF9tv/PQinG1Ou2+IEZ+nf4dtQjg
+dANdMLe4iI0d6Xd/dIMpZK+mgw2+VmJFQScHaIxD7WI
+-> ssh-ed25519 xA739A nVNF4Y6VSa5PP6FFBJpVmoFYYseoFx5F2wJU+Pwk+Xk
+A5CiuTSNlX9Y76qhYgblBdJl3zPhtjWho2oL5/sIKu0
+-> ssh-ed25519 MSF3dg /WMsGnBGzquIMyw06gHKpSS4OUxheulT59kxi+/pxxU
+ppwcv7RLzUbQUM7j0Tb9rRVT9XyPMhqYr2fr4S0nTJY
+--- zOe0Ko0oxArbmxePMPDVAT0pDju7IeOAih7sNrDcoVs
+i<EFBFBD>k<EFBFBD>A
+hODV<44>w!<21><0C><>E݈<45><DD88>+<2B><>`<60><><EFBFBD><EFBFBD>C<><43>5<EFBFBD>L<EFBFBD>A<EFBFBD>t<1A>M^<01>E<<1B>HI<48>_<EFBFBD>nn<6E><6E><EFBFBD>o<EFBFBD>?<3F>j-<05>
+A<1B>nԔί<1B>>Z<><5A>z<EFBFBD><7A><EFBFBD>dT<64><54>b"<22>(@<40><>{_ځC
--- a/secrets/vpn-dac-client-key.age
+++ b/secrets/vpn-dac-client-key.age
--- a/secrets/vpn-dac-login.age
+++ b/secrets/vpn-dac-login.age
--- a/secrets/wg-apex.age
+++ b/secrets/wg-apex.age
--- a/secrets/wg-fox.age
+++ b/secrets/wg-fox.age
@@ -1,14 +0,0 @@
-age-encryption.org/v1
-> ssh-ed25519 cDBabA heyW9/cxgwFX9IexQIXjAQDWGQPNcMXcArQp2Rxsqx4
-o9MQ7EH8PDDjsJdpH9F3Xq2zUoaDAJQlfFmYucSFs6Y
-> ssh-ed25519 cK5kHw Sza4pos7K3qW3omEeyidI/jszJNf9smemSZnUJfCIww
-D6vazXki7hIYraIuSiGPS+FPbkFUwHhHWDf52OhEIMg
-> ssh-ed25519 CAWG4Q YexIHueOIMmIN8JIDyNUOKBkyz/k18HqV3hTXh48KlM
-xh8UJzzWT6ByN+Dpn4JrMNsjGC/uc/v6LynwjBDz9NQ
-> ssh-ed25519 xA739A KySG3TXdqfCMUkVEDGa74B0op745s3XGYxFLyAXSQAc
-5EI/yb5ctW9Qu18bHm3/sK97kwGcKzzmWvPSCWm89XA
-> ssh-ed25519 MSF3dg MNxnNj0fHmri8ophexXPNjRUBUWrzcuk5S1mucxUMTE
-GVFWXtISEU8ZmlwL4nh4weAgfGrt2GHX0DTzbpS6zg8
--- UdrqkYG2ZApAuwdZeNhC50NP2rkD/Ol6y8nJa4RHx7Y
-<EFBFBD>ܻ<EFBFBD>m(<28><><EFBFBD>><3E>H<48>Y87<><37>G<0F>+*<12><><EFBFBD><EFBFBD>9V<>.<2E><><EFBFBD><EFBFBD><03><><EFBFBD>p<EFBFBD>Oo<4F>=+哇<>P0<50><30>{<7B>)<29><17><><EFBFBD><EFBFBD>><3E>z3P^
-u
--- a/web/content/fox/_index.md
+++ b/web/content/fox/_index.md
@@ -21,28 +21,17 @@ the detailed specifications:

 ## Access

-To access the machine, request a SLURM session from [apex](/apex) using the `fox`
-partition. If you need the machine for performance measurements, use an
-exclusive reservation:
+To access the machine, request a SLURM session from [hut](/hut) using the `fox`
+partition:

-    apex% salloc -p fox --exclusive
+    hut% salloc -p fox

-Otherwise, specify the CPUs that you need so other users can also use the node
-at the same time:
+Then connect via ssh:

-    apex% salloc -p fox -c 8
-
-Then use srun to execute an interactive shell:
-
-    apex% srun --pty $SHELL
+    hut% ssh fox
    fox%

-Make sure you get all CPUs you expect:
-
-    fox% grep Cpus_allowed_list /proc/self/status
-    Cpus_allowed_list:	0-191
-
-Follow [these steps](/access) if you don't have access to apex or fox.
+Follow [these steps](/access) if you don't have access to hut or fox.

 ## CUDA

@@ -96,21 +85,13 @@ Then just run `nix develop` from the same directory:
    Cuda compilation tools, release 12.4, V12.4.99
    Build cuda_12.4.r12.4/compiler.33961263_0

-## AMD uProf
-
-The [AMD uProf](https://www.amd.com/en/developer/uprof.html) performance
-analysis tool-suite is installed and ready to use.
-
-See the [AMD uProf user guide](https://docs.amd.com/r/en-US/57368-uProf-user-guide)
-for more details on how to use the tools. To use the GUI make sure that you
-connect to fox using X11 forwarding.
-
 ## Filesystems

 The machine has several file systems available.

- `/nfs/home`: The `/home` from apex via NFS, which is also shared with other
-  xeon machines. It has about 2 ms of latency, so not suitable for quick random
-  access.
+- `$HOME`: Mounted via NFS across all nodes. It is slow and has low capacity.
+  Don't abuse.
+- `/ceph/home/$USER`: Shared Ceph file system across jungle nodes. Slow but high
+  capacity. Stores three redundant copies of every file.
 - `/nvme{0,1}/$USER`: The two local NVME disks, very fast and large capacity.
 - `/tmp`: tmpfs, fast but not backed by a disk. Will be erased on reboot.
Author	SHA1	Message	Date
Rodrigo Arias Mallo	e3b9c08748	Monitor GPFS scratch partition	2025-07-11 10:20:08 +02:00
Rodrigo Arias Mallo	c94e6fa497	Monitor tent node	2025-07-11 10:20:08 +02:00
Rodrigo Arias Mallo	ba8e6e1888	Monitor GPFS home and projects partitions	2025-07-11 10:20:08 +02:00