Enable open source NVidia driver in fox

It is recommended for newer versions.
Remove option allowUnfree from fox and raccoon
2025-07-17 11:32:35 +02:00 · 2025-07-17 11:26:27 +02:00
54 changed files with 209 additions and 1145 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +0,0 @@
-*.pdf filter=lfs diff=lfs merge=lfs -text
--- a/doc/Intel_Server_Board_S2600WF_TPS_2_6.pdf
+++ b/doc/Intel_Server_Board_S2600WF_TPS_2_6.pdf
--- a/doc/R1000WF_SystemIntegration_and_ServiceGuide_Rev2_4.pdf
+++ b/doc/R1000WF_SystemIntegration_and_ServiceGuide_Rev2_4.pdf
--- a/doc/SEL_TroubleshootingGuide.pdf
+++ b/doc/SEL_TroubleshootingGuide.pdf
--- a/doc/bsc-ssf.pdf
+++ b/doc/bsc-ssf.pdf
--- a/flake.nix
+++ b/flake.nix
@@ -5,7 +5,6 @@
    agenix.inputs.nixpkgs.follows = "nixpkgs";
    bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
    bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
-    self.lfs = false;
  };

  outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
--- a/keys.nix
+++ b/keys.nix
@@ -16,7 +16,8 @@ rec {
  };

  hostGroup = with hosts; rec {
-    compute    = [ owl1 owl2 fox ];
+    untrusted  = [ fox ];
+    compute    = [ owl1 owl2 ];
    playground = [ eudy koro weasel ];
    storage    = [ bay lake2 ];
    monitor    = [ hut ];
@@ -30,7 +31,6 @@ rec {
  admins = {
    "rarias@hut"  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
    "rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
-    "rarias@fox"  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
    root          = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
  };
 }
--- a/m/apex/configuration.nix
+++ b/m/apex/configuration.nix
@@ -5,9 +5,7 @@
    ../common/xeon.nix
    ../common/ssf/hosts.nix
    ../module/ceph.nix
-    ../module/slurm-server.nix
    ./nfs.nix
-    ./wireguard.nix
  ];

  # Don't install grub MBR for now
@@ -72,8 +70,6 @@
      # Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
      # logs. Insert as first position so we also protect SSH.
      iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse
-      # Same with opsmonweb01.bsc.es which seems to be trying to access via SSH
-      iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
    '';
  };

--- a/m/apex/nfs.nix
+++ b/m/apex/nfs.nix
@@ -8,7 +8,6 @@
    statdPort = 4000;
    exports = ''
      /home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash)
-      /home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash)
    '';
  };
  networking.firewall = {
@@ -28,21 +27,6 @@
      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001  -j nixos-fw-accept
      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002  -j nixos-fw-accept
      iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
-
-      # Accept NFS traffic from wg0
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111   -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002  -j nixos-fw-accept
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
-      # Same but UDP
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111   -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002  -j nixos-fw-accept
-      iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
    '';
  };
 }
--- a/m/apex/wireguard.nix
+++ b/m/apex/wireguard.nix
@@ -1,35 +0,0 @@
-{ config, ... }:
-
-{
-  networking.firewall = {
-    allowedUDPPorts = [ 666 ];
-  };
-
-  age.secrets.wgApex.file = ../../secrets/wg-apex.age;
-
-  # Enable WireGuard
-  networking.wireguard.enable = true;
-  networking.wireguard.interfaces = {
-    # "wg0" is the network interface name. You can name the interface arbitrarily.
-    wg0 = {
-      ips = [ "10.106.0.30/24" ];
-      listenPort = 666;
-      privateKeyFile = config.age.secrets.wgApex.path;
-      # Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
-      peers = [
-        {
-          name = "Fox";
-          publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
-          allowedIPs = [ "10.106.0.0/24" ];
-          endpoint = "fox.ac.upc.edu:666";
-          # Send keepalives every 25 seconds. Important to keep NAT tables alive.
-          persistentKeepalive = 25;
-        }
-      ];
-    };
-  };
-
-  networking.hosts = {
-    "10.106.0.1" = [ "fox" ];
-  };
-}
--- a/m/common/base.nix
+++ b/m/common/base.nix
@@ -3,7 +3,6 @@
  # Includes the basic configuration for an Intel server.
  imports = [
    ./base/agenix.nix
-    ./base/always-power-on.nix
    ./base/august-shutdown.nix
    ./base/boot.nix
    ./base/env.nix
--- a/m/common/base/always-power-on.nix
+++ b/m/common/base/always-power-on.nix
@@ -1,8 +0,0 @@
-{
-  imports = [
-    ../../module/power-policy.nix
-  ];
-
-  # Turn on as soon as we have power
-  power.policy = "always-on";
-}
--- a/m/common/base/august-shutdown.nix
+++ b/m/common/base/august-shutdown.nix
@@ -1,12 +1,12 @@
 {
-  # Shutdown all machines on August 3rd at 22:00, so we can protect the
+  # Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
  # hardware from spurious electrical peaks on the yearly electrical cut for
  # manteinance that starts on August 4th.
  systemd.timers.august-shutdown = {
-    description = "Shutdown on August 3rd for maintenance";
+    description = "Shutdown on August 2nd for maintenance";
    wantedBy = [ "timers.target" ];
    timerConfig = {
-      OnCalendar = "*-08-03 22:00:00";
+      OnCalendar = "*-08-02 11:00:00";
      RandomizedDelaySec = "10min";
      Unit = "systemd-poweroff.service";
    };
--- a/m/common/base/env.nix
+++ b/m/common/base/env.nix
@@ -4,7 +4,7 @@
  environment.systemPackages = with pkgs; [
    vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
    nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
-    ncdu config.boot.kernelPackages.perf ldns pv git-lfs
+    ncdu config.boot.kernelPackages.perf ldns pv
    # From bsckgs overlay
    osumb
  ];
--- a/m/common/base/net.nix
+++ b/m/common/base/net.nix
@@ -14,7 +14,7 @@
    nftables.enable = lib.mkForce false;

    hosts = {
-      "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
+      "84.88.53.236" = [ "apex" "ssfhead.bsc.es" "ssfhead" ];
      "84.88.51.152" = [ "raccoon" ];
      "84.88.51.142" = [ "raccoon-ipmi" ];
    };
--- a/m/common/base/users.nix
+++ b/m/common/base/users.nix
@@ -154,20 +154,6 @@
          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a"
        ];
      };
-
-      csiringo = {
-        # Arbitrary UID but large so it doesn't collide with other users on ssfhead.
-        uid = 9653;
-        isNormalUser = true;
-        home = "/home/Computational/csiringo";
-        description = "Cesare Siringo";
-        group = "Computational";
-        hosts = [ "apex" "weasel" ];
-        hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
-        openssh.authorizedKeys.keys = [
-          "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
-        ];
-      };
    };

    groups = {
--- a/m/fox/configuration.nix
+++ b/m/fox/configuration.nix
@@ -5,15 +5,8 @@
    ../common/base.nix
    ../common/xeon/console.nix
    ../module/emulation.nix
-    ../module/nvidia.nix
-    ../module/slurm-client.nix
-    ./wireguard.nix
  ];

-  # Don't turn off on August as UPC has different dates.
-  # Fox works fine on power cuts.
-  systemd.timers.august-shutdown.enable = false;
-
  # Select the this using the ID to avoid mismatches
  boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";

@@ -37,18 +30,6 @@

  services.openssh.settings.X11Forwarding = true;

-  services.fail2ban.enable = true;
-
-  # Use SSH tunnel to reach internal hosts
-  programs.ssh.extraConfig = ''
-    Host bscpm04.bsc.es gitlab-internal.bsc.es tent
-      ProxyJump raccoon
-    Host raccoon
-      ProxyJump apex
-      HostName 127.0.0.1
-      Port 22022
-  '';
-
  networking = {
    timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
    hostName = "fox";
@@ -72,20 +53,17 @@
    extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
  };

-  # Recommended for new graphics cards
+  # Configure Nvidia driver to use with CUDA
+  hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
  hardware.nvidia.open = true;
+  hardware.graphics.enable = true;
+  nixpkgs.config.nvidia.acceptLicense = true;
+  services.xserver.videoDrivers = [ "nvidia" ];

  # Mount NVME disks
  fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
  fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };

-  # Mount the NFS home
-  fileSystems."/nfs/home" = {
-    device = "10.106.0.30:/home";
-    fsType = "nfs";
-    options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
-  };
-
  # Make a /nvme{0,1}/$USER directory for each user.
  systemd.services.create-nvme-dirs = let
    # Take only normal users in fox
@@ -102,20 +80,4 @@
    wantedBy = [ "multi-user.target" ];
    serviceConfig.ExecStart = script;
  };
-
-  # Only allow SSH connections from users who have a SLURM allocation
-  # See: https://slurm.schedmd.com/pam_slurm_adopt.html
-  security.pam.services.sshd.rules.account.slurm = {
-    control = "required";
-    enable = true;
-    modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
-    args = [ "log_level=debug5" ];
-    order = 999999; # Make it last one
-  };
-
-  # Disable systemd session (pam_systemd.so) as it will conflict with the
-  # pam_slurm_adopt.so module. What happens is that the shell is first adopted
-  # into the slurmstepd task and then into the systemd session, which is not
-  # what we want, otherwise it will linger even if all jobs are gone.
-  security.pam.services.sshd.startSession = lib.mkForce false;
 }
--- a/m/fox/wireguard.nix
+++ b/m/fox/wireguard.nix
@@ -1,46 +0,0 @@
-{ config, ... }:
-
-{
-  networking.firewall = {
-    allowedUDPPorts = [ 666 ];
-  };
-
-  age.secrets.wgFox.file = ../../secrets/wg-fox.age;
-
-  networking.wireguard.enable = true;
-  networking.wireguard.interfaces = {
-    # "wg0" is the network interface name. You can name the interface arbitrarily.
-    wg0 = {
-      # Determines the IP address and subnet of the server's end of the tunnel interface.
-      ips = [ "10.106.0.1/24" ];
-
-      # The port that WireGuard listens to. Must be accessible by the client.
-      listenPort = 666;
-
-      # Path to the private key file.
-      privateKeyFile = config.age.secrets.wgFox.path;
-      # Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
-
-      peers = [
-        # List of allowed peers.
-        { 
-          name = "Apex";
-          publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
-          # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
-          allowedIPs = [ "10.106.0.30/32" ];
-        }
-      ];
-    };
-  };
-
-  networking.hosts = {
-    "10.106.0.30" = [ "apex" ];
-  };
-
-  networking.firewall = {
-    extraCommands = ''
-      # Accept slurm connections to slurmd from apex (via wireguard)
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
-    '';
-  };
-}
--- a/m/hut/configuration.nix
+++ b/m/hut/configuration.nix
@@ -7,9 +7,11 @@
    ../module/ceph.nix
    ../module/debuginfod.nix
    ../module/emulation.nix
+    ../module/slurm-client.nix
    ./gitlab-runner.nix
    ./monitoring.nix
    ./nfs.nix
+    ./slurm-server.nix
    ./nix-serve.nix
    ./public-inbox.nix
    ./gitea.nix
--- a/m/hut/slurm-server.nix
+++ b/m/hut/slurm-server.nix
@@ -0,0 +1,7 @@
+{ ... }:
+
+{
+  services.slurm = {
+    server.enable = true;
+  };
+}
--- a/m/hut/targets.yml
+++ b/m/hut/targets.yml
@@ -4,7 +4,7 @@
  - xeon03-ipmi
  - xeon04-ipmi
  - koro-ipmi
-  - weasel-ipmi
+  - xeon06-ipmi
  - hut-ipmi
  - eudy-ipmi
  # Storage
--- a/m/module/nvidia.nix
+++ b/m/module/nvidia.nix
@@ -1,20 +0,0 @@
-{ lib, config, pkgs, ... }:
-{
-  # Configure Nvidia driver to use with CUDA
-  hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
-  hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set");
-  hardware.graphics.enable = true;
-  nixpkgs.config.nvidia.acceptLicense = true;
-  services.xserver.videoDrivers = [ "nvidia" ];
-
-  # enable support for derivations which require nvidia-gpu to be available
-  # > requiredSystemFeatures = [ "cuda" ];
-  programs.nix-required-mounts.enable = true;
-  programs.nix-required-mounts.presets.nvidia-gpu.enable = true;
-  # They forgot to add the symlink
-  programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [
-    config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument
-  ];
-
-  environment.systemPackages = [ pkgs.cudainfo ];
-}
--- a/m/module/power-policy.nix
+++ b/m/module/power-policy.nix
@@ -1,33 +0,0 @@
-{ config, lib, pkgs, ... }:
-
-with lib;
-
-let
-  cfg = config.power.policy;
-in
-{
-  options = {
-    power.policy = mkOption {
-      type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]);
-      default = null;
-      description = "Set power policy to use via IPMI.";
-    };
-  };
-
-  config = mkIf (cfg != null) {
-    systemd.services."power-policy" = {
-      description = "Set power policy to use via IPMI";
-      wantedBy = [ "multi-user.target" ];
-      unitConfig = {
-        StartLimitBurst = "10";
-        StartLimitIntervalSec = "10m";
-      };
-      serviceConfig = {
-        ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}";
-        Type = "oneshot";
-        Restart = "on-failure";
-        RestartSec = "5s";
-      };
-    };
-  };
-}
--- a/m/module/slurm-client.nix
+++ b/m/module/slurm-client.nix
@@ -1,10 +1,33 @@
-{ lib, ... }:
+{ config, pkgs, lib, ... }:

-{
-  imports = [
-    ./slurm-common.nix
-  ];
+let
+  suspendProgram = pkgs.writeScript "suspend.sh" ''
+    #!/usr/bin/env bash
+    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
+    set -x
+    export "PATH=/run/current-system/sw/bin:$PATH"
+    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
+    hosts=$(scontrol show hostnames $1)
+    for host in $hosts; do
+      echo Shutting down host: $host
+      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
+    done
+  '';

+  resumeProgram = pkgs.writeScript "resume.sh" ''
+    #!/usr/bin/env bash
+    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
+    set -x
+    export "PATH=/run/current-system/sw/bin:$PATH"
+    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
+    hosts=$(scontrol show hostnames $1)
+    for host in $hosts; do
+      echo Starting host: $host
+      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
+    done
+  '';
+
+in {
  systemd.services.slurmd.serviceConfig = {
    # Kill all processes in the control group on stop/restart. This will kill
    # all the jobs running, so ensure that we only upgrade when the nodes are
@@ -14,5 +37,90 @@
    KillMode = lib.mkForce "control-group";
  };

-  services.slurm.client.enable = true;
+  services.slurm = {
+    client.enable = true;
+    controlMachine = "hut";
+    clusterName = "jungle";
+    nodeName = [
+      "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
+      "hut       Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
+    ];
+
+    partitionName = [
+      "owl Nodes=owl[1-2]     Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
+    ];
+
+    # See slurm.conf(5) for more details about these options.
+    extraConfig = ''
+      # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
+      # not with Intel MPI. For that use the compatibility shim libpmi.so
+      # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
+      # library in SLURM (--mpi=pmix). See more details here:
+      # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
+      MpiDefault=pmix
+
+      # When a node reboots return that node to the slurm queue as soon as it
+      # becomes operative again.
+      ReturnToService=2
+
+      # Track all processes by using a cgroup
+      ProctrackType=proctrack/cgroup
+
+      # Enable task/affinity to allow the jobs to run in a specified subset of
+      # the resources. Use the task/cgroup plugin to enable process containment.
+      TaskPlugin=task/affinity,task/cgroup
+
+      # Power off unused nodes until they are requested
+      SuspendProgram=${suspendProgram}
+      SuspendTimeout=60
+      ResumeProgram=${resumeProgram}
+      ResumeTimeout=300
+      SuspendExcNodes=hut
+
+      # Turn the nodes off after 1 hour of inactivity
+      SuspendTime=3600
+
+      # Reduce port range so we can allow only this range in the firewall
+      SrunPortRange=60000-61000
+
+      # Use cores as consumable resources. In SLURM terms, a core may have
+      # multiple hardware threads (or CPUs).
+      SelectType=select/cons_tres
+
+      # Ignore memory constraints and only use unused cores to share a node with
+      # other jobs.
+      SelectTypeParameters=CR_Core
+
+      # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
+      # This sets up the "extern" step into which ssh-launched processes will be
+      # adopted. Alloc runs the prolog at job allocation (salloc) rather than
+      # when a task runs (srun) so we can ssh early.
+      PrologFlags=Alloc,Contain,X11
+
+      # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
+      # adopted by the external step, similar to tasks running in regular steps
+      # LaunchParameters=ulimit_pam_adopt
+      SlurmdDebug=debug5
+      #DebugFlags=Protocol,Cgroup
+    '';
+
+    extraCgroupConfig = ''
+      CgroupPlugin=cgroup/v2
+      #ConstrainCores=yes
+    '';
+  };
+
+  # Place the slurm config in /etc as this will be required by PAM
+  environment.etc.slurm.source = config.services.slurm.etcSlurm;
+
+  age.secrets.mungeKey = {
+    file = ../../secrets/munge-key.age;
+    owner = "munge";
+    group = "munge";
+  };
+
+  services.munge = {
+    enable = true;
+    password = config.age.secrets.mungeKey.path;
+  };
 }
--- a/m/module/slurm-common.nix
+++ b/m/module/slurm-common.nix
@@ -1,115 +0,0 @@
-{ config, pkgs, ... }:
-
-let
-  suspendProgram = pkgs.writeShellScript "suspend.sh" ''
-    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
-    set -x
-    export "PATH=/run/current-system/sw/bin:$PATH"
-    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
-    hosts=$(scontrol show hostnames $1)
-    for host in $hosts; do
-      echo Shutting down host: $host
-      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
-    done
-  '';
-
-  resumeProgram = pkgs.writeShellScript "resume.sh" ''
-    exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
-    set -x
-    export "PATH=/run/current-system/sw/bin:$PATH"
-    echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
-    hosts=$(scontrol show hostnames $1)
-    for host in $hosts; do
-      echo Starting host: $host
-      ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
-    done
-  '';
-
-in {
-  services.slurm = {
-    controlMachine = "apex";
-    clusterName = "jungle";
-    nodeName = [
-      "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
-      "fox       Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
-    ];
-
-    partitionName = [
-      "owl Nodes=owl[1-2]     Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
-      "fox Nodes=fox          Default=NO  DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
-    ];
-
-    # See slurm.conf(5) for more details about these options.
-    extraConfig = ''
-      # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
-      # not with Intel MPI. For that use the compatibility shim libpmi.so
-      # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
-      # library in SLURM (--mpi=pmix). See more details here:
-      # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
-      MpiDefault=pmix
-
-      # When a node reboots return that node to the slurm queue as soon as it
-      # becomes operative again.
-      ReturnToService=2
-
-      # Track all processes by using a cgroup
-      ProctrackType=proctrack/cgroup
-
-      # Enable task/affinity to allow the jobs to run in a specified subset of
-      # the resources. Use the task/cgroup plugin to enable process containment.
-      TaskPlugin=task/affinity,task/cgroup
-
-      # Power off unused nodes until they are requested
-      SuspendProgram=${suspendProgram}
-      SuspendTimeout=60
-      ResumeProgram=${resumeProgram}
-      ResumeTimeout=300
-      SuspendExcNodes=fox
-
-      # Turn the nodes off after 1 hour of inactivity
-      SuspendTime=3600
-
-      # Reduce port range so we can allow only this range in the firewall
-      SrunPortRange=60000-61000
-
-      # Use cores as consumable resources. In SLURM terms, a core may have
-      # multiple hardware threads (or CPUs).
-      SelectType=select/cons_tres
-
-      # Ignore memory constraints and only use unused cores to share a node with
-      # other jobs.
-      SelectTypeParameters=CR_Core
-
-      # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
-      # This sets up the "extern" step into which ssh-launched processes will be
-      # adopted. Alloc runs the prolog at job allocation (salloc) rather than
-      # when a task runs (srun) so we can ssh early.
-      PrologFlags=Alloc,Contain,X11
-
-      # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
-      # adopted by the external step, similar to tasks running in regular steps
-      # LaunchParameters=ulimit_pam_adopt
-      SlurmdDebug=debug5
-      #DebugFlags=Protocol,Cgroup
-    '';
-
-    extraCgroupConfig = ''
-      CgroupPlugin=cgroup/v2
-      #ConstrainCores=yes
-    '';
-  };
-
-  # Place the slurm config in /etc as this will be required by PAM
-  environment.etc.slurm.source = config.services.slurm.etcSlurm;
-
-  age.secrets.mungeKey = {
-    file = ../../secrets/munge-key.age;
-    owner = "munge";
-    group = "munge";
-  };
-
-  services.munge = {
-    enable = true;
-    password = config.age.secrets.mungeKey.path;
-  };
-}
--- a/m/module/slurm-server.nix
+++ b/m/module/slurm-server.nix
@@ -1,23 +0,0 @@
-{ ... }:
-
-{
-  imports = [
-    ./slurm-common.nix
-  ];
-
-  services.slurm.server.enable = true;
-
-  networking.firewall = {
-    extraCommands = ''
-      # Accept slurm connections to controller from compute nodes
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
-      # Accept slurm connections from compute nodes for srun
-      iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
-
-      # Accept slurm connections to controller from fox (via wireguard)
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
-      # Accept slurm connections from fox for srun (via wireguard)
-      iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
-    '';
-  };
-}
--- a/m/module/ssh-hut-extern.nix
+++ b/m/module/ssh-hut-extern.nix
@@ -1,8 +1,9 @@
 {
  programs.ssh.extraConfig = ''
-    Host apex ssfhead
+    Host ssfhead
      HostName ssflogin.bsc.es
    Host hut
-      ProxyJump apex
+      ProxyJump ssfhead
+      HostName xeon07
  '';
 }
--- a/m/raccoon/configuration.nix
+++ b/m/raccoon/configuration.nix
@@ -6,7 +6,6 @@
    ../module/emulation.nix
    ../module/debuginfod.nix
    ../module/ssh-hut-extern.nix
-    ../module/nvidia.nix
    ../eudy/kernel/perf.nix
  ];

@@ -39,7 +38,6 @@
    };
    hosts = {
      "10.0.44.4" = [ "tent" ];
-      "84.88.53.236" = [ "apex" ];
    };
  };

@@ -51,7 +49,14 @@
  # Enable performance governor
  powerManagement.cpuFreqGovernor = "performance";

-  hardware.nvidia.open = false; # Maxwell is older than Turing architecture
+  # Configure Nvidia driver to use with CUDA
+  hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
+  hardware.graphics.enable = true;
+  nixpkgs.config.nvidia.acceptLicense = true;
+  services.xserver.videoDrivers = [ "nvidia" ];
+
+  # Disable garbage collection for now
+  nix.gc.automatic = lib.mkForce false;

  services.openssh.settings.X11Forwarding = true;

--- a/m/tent/configuration.nix
+++ b/m/tent/configuration.nix
@@ -33,9 +33,6 @@
    nameservers = [ "84.88.52.35" "84.88.52.36" ];
    search = [ "bsc.es" "ac.upc.edu" ];
    defaultGateway = "10.0.44.1";
-    hosts = {
-      "84.88.53.236" = [ "apex" ];
-    };
  };

  services.p.enable = true;
--- a/m/tent/gitea.nix
+++ b/m/tent/gitea.nix
@@ -26,7 +26,5 @@
        SENDMAIL_ARGS = "--";
      };
    };
-
-    lfs.enable = true;
  };
 }
--- a/m/tent/nginx.nix
+++ b/m/tent/nginx.nix
@@ -39,7 +39,6 @@ in
          rewrite ^/git/(.*) /$1 break;
          proxy_pass http://127.0.0.1:3000;
          proxy_redirect http:// $scheme://;
-          client_max_body_size 64M;
        }
        location /cache {
          rewrite ^/cache/(.*) /$1 break;
--- a/m/weasel/configuration.nix
+++ b/m/weasel/configuration.nix
@@ -14,10 +14,6 @@
  # Users with sudo access
  users.groups.wheel.members = [ "abonerib" "anavarro" ];

-  # Run julia installed with juliaup using julia's own libraries:
-  # NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia
-  programs.nix-ld.enable = true;
-
  networking = {
    hostName = "weasel";
    interfaces.eno1.ipv4.addresses = [ {
--- a/pkgs/cudainfo/Makefile
+++ b/pkgs/cudainfo/Makefile
@@ -1,12 +0,0 @@
-HOSTCXX  ?= g++
-NVCC     := nvcc -ccbin $(HOSTCXX)
-CXXFLAGS := -m64
-
-# Target rules
-all: cudainfo
-
-cudainfo: cudainfo.cpp
-	$(NVCC) $(CXXFLAGS) -o $@ $<
-
-clean:
-	rm -f cudainfo cudainfo.o
--- a/pkgs/cudainfo/cudainfo.cpp
+++ b/pkgs/cudainfo/cudainfo.cpp
@@ -1,600 +0,0 @@
-/*
- * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
-
-// Shared Utilities (QA Testing)
-
-// std::system includes
-#include <memory>
-#include <iostream>
-
-#include <cuda_runtime.h>
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ )
-
-// CUDA Runtime error messages
-#ifdef __DRIVER_TYPES_H__
-static const char *_cudaGetErrorEnum(cudaError_t error)
-{
-    switch (error)
-    {
-        case cudaSuccess:
-            return "cudaSuccess";
-
-        case cudaErrorMissingConfiguration:
-            return "cudaErrorMissingConfiguration";
-
-        case cudaErrorMemoryAllocation:
-            return "cudaErrorMemoryAllocation";
-
-        case cudaErrorInitializationError:
-            return "cudaErrorInitializationError";
-
-        case cudaErrorLaunchFailure:
-            return "cudaErrorLaunchFailure";
-
-        case cudaErrorPriorLaunchFailure:
-            return "cudaErrorPriorLaunchFailure";
-
-        case cudaErrorLaunchTimeout:
-            return "cudaErrorLaunchTimeout";
-
-        case cudaErrorLaunchOutOfResources:
-            return "cudaErrorLaunchOutOfResources";
-
-        case cudaErrorInvalidDeviceFunction:
-            return "cudaErrorInvalidDeviceFunction";
-
-        case cudaErrorInvalidConfiguration:
-            return "cudaErrorInvalidConfiguration";
-
-        case cudaErrorInvalidDevice:
-            return "cudaErrorInvalidDevice";
-
-        case cudaErrorInvalidValue:
-            return "cudaErrorInvalidValue";
-
-        case cudaErrorInvalidPitchValue:
-            return "cudaErrorInvalidPitchValue";
-
-        case cudaErrorInvalidSymbol:
-            return "cudaErrorInvalidSymbol";
-
-        case cudaErrorMapBufferObjectFailed:
-            return "cudaErrorMapBufferObjectFailed";
-
-        case cudaErrorUnmapBufferObjectFailed:
-            return "cudaErrorUnmapBufferObjectFailed";
-
-        case cudaErrorInvalidHostPointer:
-            return "cudaErrorInvalidHostPointer";
-
-        case cudaErrorInvalidDevicePointer:
-            return "cudaErrorInvalidDevicePointer";
-
-        case cudaErrorInvalidTexture:
-            return "cudaErrorInvalidTexture";
-
-        case cudaErrorInvalidTextureBinding:
-            return "cudaErrorInvalidTextureBinding";
-
-        case cudaErrorInvalidChannelDescriptor:
-            return "cudaErrorInvalidChannelDescriptor";
-
-        case cudaErrorInvalidMemcpyDirection:
-            return "cudaErrorInvalidMemcpyDirection";
-
-        case cudaErrorAddressOfConstant:
-            return "cudaErrorAddressOfConstant";
-
-        case cudaErrorTextureFetchFailed:
-            return "cudaErrorTextureFetchFailed";
-
-        case cudaErrorTextureNotBound:
-            return "cudaErrorTextureNotBound";
-
-        case cudaErrorSynchronizationError:
-            return "cudaErrorSynchronizationError";
-
-        case cudaErrorInvalidFilterSetting:
-            return "cudaErrorInvalidFilterSetting";
-
-        case cudaErrorInvalidNormSetting:
-            return "cudaErrorInvalidNormSetting";
-
-        case cudaErrorMixedDeviceExecution:
-            return "cudaErrorMixedDeviceExecution";
-
-        case cudaErrorCudartUnloading:
-            return "cudaErrorCudartUnloading";
-
-        case cudaErrorUnknown:
-            return "cudaErrorUnknown";
-
-        case cudaErrorNotYetImplemented:
-            return "cudaErrorNotYetImplemented";
-
-        case cudaErrorMemoryValueTooLarge:
-            return "cudaErrorMemoryValueTooLarge";
-
-        case cudaErrorInvalidResourceHandle:
-            return "cudaErrorInvalidResourceHandle";
-
-        case cudaErrorNotReady:
-            return "cudaErrorNotReady";
-
-        case cudaErrorInsufficientDriver:
-            return "cudaErrorInsufficientDriver";
-
-        case cudaErrorSetOnActiveProcess:
-            return "cudaErrorSetOnActiveProcess";
-
-        case cudaErrorInvalidSurface:
-            return "cudaErrorInvalidSurface";
-
-        case cudaErrorNoDevice:
-            return "cudaErrorNoDevice";
-
-        case cudaErrorECCUncorrectable:
-            return "cudaErrorECCUncorrectable";
-
-        case cudaErrorSharedObjectSymbolNotFound:
-            return "cudaErrorSharedObjectSymbolNotFound";
-
-        case cudaErrorSharedObjectInitFailed:
-            return "cudaErrorSharedObjectInitFailed";
-
-        case cudaErrorUnsupportedLimit:
-            return "cudaErrorUnsupportedLimit";
-
-        case cudaErrorDuplicateVariableName:
-            return "cudaErrorDuplicateVariableName";
-
-        case cudaErrorDuplicateTextureName:
-            return "cudaErrorDuplicateTextureName";
-
-        case cudaErrorDuplicateSurfaceName:
-            return "cudaErrorDuplicateSurfaceName";
-
-        case cudaErrorDevicesUnavailable:
-            return "cudaErrorDevicesUnavailable";
-
-        case cudaErrorInvalidKernelImage:
-            return "cudaErrorInvalidKernelImage";
-
-        case cudaErrorNoKernelImageForDevice:
-            return "cudaErrorNoKernelImageForDevice";
-
-        case cudaErrorIncompatibleDriverContext:
-            return "cudaErrorIncompatibleDriverContext";
-
-        case cudaErrorPeerAccessAlreadyEnabled:
-            return "cudaErrorPeerAccessAlreadyEnabled";
-
-        case cudaErrorPeerAccessNotEnabled:
-            return "cudaErrorPeerAccessNotEnabled";
-
-        case cudaErrorDeviceAlreadyInUse:
-            return "cudaErrorDeviceAlreadyInUse";
-
-        case cudaErrorProfilerDisabled:
-            return "cudaErrorProfilerDisabled";
-
-        case cudaErrorProfilerNotInitialized:
-            return "cudaErrorProfilerNotInitialized";
-
-        case cudaErrorProfilerAlreadyStarted:
-            return "cudaErrorProfilerAlreadyStarted";
-
-        case cudaErrorProfilerAlreadyStopped:
-            return "cudaErrorProfilerAlreadyStopped";
-
-        /* Since CUDA 4.0*/
-        case cudaErrorAssert:
-            return "cudaErrorAssert";
-
-        case cudaErrorTooManyPeers:
-            return "cudaErrorTooManyPeers";
-
-        case cudaErrorHostMemoryAlreadyRegistered:
-            return "cudaErrorHostMemoryAlreadyRegistered";
-
-        case cudaErrorHostMemoryNotRegistered:
-            return "cudaErrorHostMemoryNotRegistered";
-
-        /* Since CUDA 5.0 */
-        case cudaErrorOperatingSystem:
-            return "cudaErrorOperatingSystem";
-
-        case cudaErrorPeerAccessUnsupported:
-            return "cudaErrorPeerAccessUnsupported";
-
-        case cudaErrorLaunchMaxDepthExceeded:
-            return "cudaErrorLaunchMaxDepthExceeded";
-
-        case cudaErrorLaunchFileScopedTex:
-            return "cudaErrorLaunchFileScopedTex";
-
-        case cudaErrorLaunchFileScopedSurf:
-            return "cudaErrorLaunchFileScopedSurf";
-
-        case cudaErrorSyncDepthExceeded:
-            return "cudaErrorSyncDepthExceeded";
-
-        case cudaErrorLaunchPendingCountExceeded:
-            return "cudaErrorLaunchPendingCountExceeded";
-
-        case cudaErrorNotPermitted:
-            return "cudaErrorNotPermitted";
-
-        case cudaErrorNotSupported:
-            return "cudaErrorNotSupported";
-
-        /* Since CUDA 6.0 */
-        case cudaErrorHardwareStackError:
-            return "cudaErrorHardwareStackError";
-
-        case cudaErrorIllegalInstruction:
-            return "cudaErrorIllegalInstruction";
-
-        case cudaErrorMisalignedAddress:
-            return "cudaErrorMisalignedAddress";
-
-        case cudaErrorInvalidAddressSpace:
-            return "cudaErrorInvalidAddressSpace";
-
-        case cudaErrorInvalidPc:
-            return "cudaErrorInvalidPc";
-
-        case cudaErrorIllegalAddress:
-            return "cudaErrorIllegalAddress";
-
-        /* Since CUDA 6.5*/
-        case cudaErrorInvalidPtx:
-            return "cudaErrorInvalidPtx";
-
-        case cudaErrorInvalidGraphicsContext:
-            return "cudaErrorInvalidGraphicsContext";
-
-        case cudaErrorStartupFailure:
-            return "cudaErrorStartupFailure";
-
-        case cudaErrorApiFailureBase:
-            return "cudaErrorApiFailureBase";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-template< typename T >
-void check(T result, char const *const func, const char *const file, int const line)
-{
-    if (result)
-    {
-        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
-                file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
-        cudaDeviceReset();
-        // Make sure we call CUDA Device Reset before exiting
-        exit(EXIT_FAILURE);
-    }
-}
-
-int *pArgc = NULL;
-char **pArgv = NULL;
-
-#if CUDART_VERSION < 5000
-
-// CUDA-C includes
-#include <cuda.h>
-
-// This function wraps the CUDA Driver API into a template function
-template <class T>
-inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
-{
-    CUresult error =    cuDeviceGetAttribute(attribute, device_attribute, device);
-
-    if (CUDA_SUCCESS != error) {
-        fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
-                error, __FILE__, __LINE__);
-
-        // cudaDeviceReset causes the driver to clean up all state. While
-        // not mandatory in normal operation, it is good practice.  It is also
-        // needed to ensure correct operation when the application is being
-        // profiled. Calling cudaDeviceReset causes all profile data to be
-        // flushed before the application exits
-        cudaDeviceReset();
-        exit(EXIT_FAILURE);
-    }
-}
-
-#endif /* CUDART_VERSION < 5000 */
-
-// Beginning of GPU Architecture definitions
-inline int ConvertSMVer2Cores(int major, int minor)
-{
-    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
-    typedef struct {
-        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
-        int Cores;
-    } sSMtoCores;
-
-    sSMtoCores nGpuArchCoresPerSM[] = {
-        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
-        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
-        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
-        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
-        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
-        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
-        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
-        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
-        {   -1, -1 }
-    };
-
-    int index = 0;
-
-    while (nGpuArchCoresPerSM[index].SM != -1) {
-        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
-            return nGpuArchCoresPerSM[index].Cores;
-        }
-
-        index++;
-    }
-
-    // If we don't find the values, we default use the previous one to run properly
-    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
-    return nGpuArchCoresPerSM[index-1].Cores;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Program main
-////////////////////////////////////////////////////////////////////////////////
-int
-main(int argc, char **argv)
-{
-    pArgc = &argc;
-    pArgv = argv;
-
-    printf("%s Starting...\n\n", argv[0]);
-    printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
-
-    int deviceCount = 0;
-    cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
-
-    if (error_id != cudaSuccess) {
-        printf("cudaGetDeviceCount failed: %s (%d)\n",
-			cudaGetErrorString(error_id), (int) error_id);
-        printf("Result = FAIL\n");
-        exit(EXIT_FAILURE);
-    }
-
-    // This function call returns 0 if there are no CUDA capable devices.
-    if (deviceCount == 0)
-        printf("There are no available device(s) that support CUDA\n");
-    else
-        printf("Detected %d CUDA Capable device(s)\n", deviceCount);
-
-    int dev, driverVersion = 0, runtimeVersion = 0;
-
-    for (dev = 0; dev < deviceCount; ++dev) {
-        cudaSetDevice(dev);
-        cudaDeviceProp deviceProp;
-        cudaGetDeviceProperties(&deviceProp, dev);
-
-        printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
-
-        // Console log
-        cudaDriverGetVersion(&driverVersion);
-        cudaRuntimeGetVersion(&runtimeVersion);
-        printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
-        printf("  CUDA Capability Major/Minor version number:    %d.%d\n", deviceProp.major, deviceProp.minor);
-
-        printf("  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n",
-                (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
-
-        printf("  (%2d) Multiprocessors, (%3d) CUDA Cores/MP:     %d CUDA Cores\n",
-               deviceProp.multiProcessorCount,
-               ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
-               ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
-        printf("  GPU Max Clock rate:                            %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
-
-
-#if CUDART_VERSION >= 5000
-        // This is supported in CUDA 5.0 (runtime API device properties)
-        printf("  Memory Clock rate:                             %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
-        printf("  Memory Bus Width:                              %d-bit\n",   deviceProp.memoryBusWidth);
-
-        if (deviceProp.l2CacheSize) {
-            printf("  L2 Cache Size:                                 %d bytes\n", deviceProp.l2CacheSize);
-        }
-
-#else
-        // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
-        int memoryClock;
-        getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
-        printf("  Memory Clock rate:                             %.0f Mhz\n", memoryClock * 1e-3f);
-        int memBusWidth;
-        getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
-        printf("  Memory Bus Width:                              %d-bit\n", memBusWidth);
-        int L2CacheSize;
-        getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
-
-        if (L2CacheSize) {
-            printf("  L2 Cache Size:                                 %d bytes\n", L2CacheSize);
-        }
-
-#endif
-
-        printf("  Maximum Texture Dimension Size (x,y,z)         1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
-               deviceProp.maxTexture1D   , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
-               deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
-        printf("  Maximum Layered 1D Texture Size, (num) layers  1D=(%d), %d layers\n",
-               deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
-        printf("  Maximum Layered 2D Texture Size, (num) layers  2D=(%d, %d), %d layers\n",
-               deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
-
-
-        printf("  Total amount of constant memory:               %lu bytes\n", deviceProp.totalConstMem);
-        printf("  Total amount of shared memory per block:       %lu bytes\n", deviceProp.sharedMemPerBlock);
-        printf("  Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
-        printf("  Warp size:                                     %d\n", deviceProp.warpSize);
-        printf("  Maximum number of threads per multiprocessor:  %d\n", deviceProp.maxThreadsPerMultiProcessor);
-        printf("  Maximum number of threads per block:           %d\n", deviceProp.maxThreadsPerBlock);
-        printf("  Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
-               deviceProp.maxThreadsDim[0],
-               deviceProp.maxThreadsDim[1],
-               deviceProp.maxThreadsDim[2]);
-        printf("  Max dimension size of a grid size    (x,y,z): (%d, %d, %d)\n",
-               deviceProp.maxGridSize[0],
-               deviceProp.maxGridSize[1],
-               deviceProp.maxGridSize[2]);
-        printf("  Maximum memory pitch:                          %lu bytes\n", deviceProp.memPitch);
-        printf("  Texture alignment:                             %lu bytes\n", deviceProp.textureAlignment);
-        printf("  Concurrent copy and kernel execution:          %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
-        printf("  Run time limit on kernels:                     %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
-        printf("  Integrated GPU sharing Host Memory:            %s\n", deviceProp.integrated ? "Yes" : "No");
-        printf("  Support host page-locked memory mapping:       %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
-        printf("  Alignment requirement for Surfaces:            %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
-        printf("  Device has ECC support:                        %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-        printf("  CUDA Device Driver Mode (TCC or WDDM):         %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
-#endif
-        printf("  Device supports Unified Addressing (UVA):      %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
-        printf("  Device PCI Domain ID / Bus ID / location ID:   %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
-
-        const char *sComputeMode[] = {
-            "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
-            "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
-            "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
-            "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
-            "Unknown",
-            NULL
-        };
-        printf("  Compute Mode:\n");
-        printf("     < %s >\n", sComputeMode[deviceProp.computeMode]);
-    }
-
-    // If there are 2 or more GPUs, query to determine whether RDMA is supported
-    if (deviceCount >= 2)
-    {
-        cudaDeviceProp prop[64];
-        int gpuid[64]; // we want to find the first two GPU's that can support P2P
-        int gpu_p2p_count = 0;
-
-        for (int i=0; i < deviceCount; i++)
-        {
-            checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
-
-            // Only boards based on Fermi or later can support P2P
-            if ((prop[i].major >= 2)
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-                // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
-                && prop[i].tccDriver
-#endif
-               )
-            {
-                // This is an array of P2P capable GPUs
-                gpuid[gpu_p2p_count++] = i;
-            }
-        }
-
-        // Show all the combinations of support P2P GPUs
-        int can_access_peer_0_1, can_access_peer_1_0;
-
-        if (gpu_p2p_count >= 2)
-        {
-            for (int i = 0; i < gpu_p2p_count-1; i++)
-            {
-                for (int j = 1; j < gpu_p2p_count; j++)
-                {
-                    checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
-                    printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
-                           prop[gpuid[j]].name, gpuid[j] ,
-                           can_access_peer_0_1 ? "Yes" : "No");
-                }
-            }
-
-            for (int j = 1; j < gpu_p2p_count; j++)
-            {
-                for (int i = 0; i < gpu_p2p_count-1; i++)
-                {
-                    checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
-                    printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
-                           prop[gpuid[i]].name, gpuid[i] ,
-                           can_access_peer_1_0 ? "Yes" : "No");
-                }
-            }
-        }
-    }
-
-    // csv masterlog info
-    // *****************************
-    // exe and CUDA driver name
-    printf("\n");
-    std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
-    char cTemp[128];
-
-    // driver version
-    sProfileString += ", CUDA Driver Version = ";
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-    sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
-#else
-    sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
-#endif
-    sProfileString +=  cTemp;
-
-    // Runtime version
-    sProfileString += ", CUDA Runtime Version = ";
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-    sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
-#else
-    sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
-#endif
-    sProfileString +=  cTemp;
-
-    // Device count
-    sProfileString += ", NumDevs = ";
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-    sprintf_s(cTemp, 10, "%d", deviceCount);
-#else
-    sprintf(cTemp, "%d", deviceCount);
-#endif
-    sProfileString += cTemp;
-
-    // Print Out all device Names
-    for (dev = 0; dev < deviceCount; ++dev)
-    {
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-        sprintf_s(cTemp, 13, ", Device%d = ", dev);
-#else
-        sprintf(cTemp, ", Device%d = ", dev);
-#endif
-        cudaDeviceProp deviceProp;
-        cudaGetDeviceProperties(&deviceProp, dev);
-        sProfileString += cTemp;
-        sProfileString += deviceProp.name;
-    }
-
-    sProfileString += "\n";
-    printf("%s", sProfileString.c_str());
-
-    printf("Result = PASS\n");
-
-    // finish
-    // cudaDeviceReset causes the driver to clean up all state. While
-    // not mandatory in normal operation, it is good practice.  It is also
-    // needed to ensure correct operation when the application is being
-    // profiled. Calling cudaDeviceReset causes all profile data to be
-    // flushed before the application exits
-    cudaDeviceReset();
-    return 0;
-}
--- a/pkgs/cudainfo/default.nix
+++ b/pkgs/cudainfo/default.nix
@@ -1,43 +0,0 @@
-{
-  stdenv
-, cudatoolkit
-, cudaPackages
-, autoAddDriverRunpath
-, strace
-}:
-
-stdenv.mkDerivation (finalAttrs: {
-  name = "cudainfo";
-  src = ./.;
-  buildInputs = [
-    cudatoolkit # Required for nvcc
-    cudaPackages.cuda_cudart.static # Required for -lcudart_static
-    autoAddDriverRunpath
-  ];
-  installPhase = ''
-    mkdir -p $out/bin
-    cp -a cudainfo $out/bin
-  '';
-  passthru.gpuCheck = stdenv.mkDerivation {
-    name = "cudainfo-test";
-    requiredSystemFeatures = [ "cuda" ];
-    dontBuild = true;
-    nativeCheckInputs = [
-      finalAttrs.finalPackage # The cudainfo package from above
-      strace # When it fails, it will show the trace
-    ];
-    dontUnpack = true;
-    doCheck = true;
-    checkPhase = ''
-      if ! cudainfo; then
-        set -x
-        cudainfo=$(command -v cudainfo)
-        ldd $cudainfo
-        readelf -d $cudainfo
-        strace -f $cudainfo
-        set +x
-      fi
-    '';
-    installPhase = "touch $out";
-  };
-})
--- a/pkgs/overlay.nix
+++ b/pkgs/overlay.nix
@@ -52,5 +52,4 @@ final: prev:
  prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
  meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
  upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
-  cudainfo = prev.callPackage ./cudainfo/default.nix { };
 }
--- a/secrets/ceph-user.age
+++ b/secrets/ceph-user.age
--- a/secrets/gitea-runner-token.age
+++ b/secrets/gitea-runner-token.age
@@ -1,13 +1,11 @@
 age-encryption.org/v1
-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik
-DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI
-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio
-hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU
-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw
-hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs
-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY
-eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM
-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg
-/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg
--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs
-<EFBFBD><EFBFBD><EFBFBD><EFBFBD>YM<EFBFBD><EFBFBD>:E O<><4F>2<EFBFBD>r=<15>&4<><04>CQΣ<51><CEA3>hC<68><43><EFBFBD>cb<63>^Sy<53><79>%	<09><>x-vC`g<><15><><EFBFBD><EFBFBD>W^<5E><>wVG<0B><><EFBFBD>
+-> ssh-ed25519 HY2yRg d7+nvfAcdC3GjJxipXFrsfGGyP5jAY+gRWRV+4FVYAM
+CG7r0bRGgnUWcdfDnpe7HwZ3L/y7b5iuJuqvf15b3/Y
+-> ssh-ed25519 CAWG4Q X0vITOErz4wkR3VQYOcVlnrkHtwe+ytdZz1Hcrs4vVs
+6IWYOhXLQ+BnML9YfLLHJYEO2CZ/uEc9IBqhoWvjDHI
+-> ssh-ed25519 xA739A p5e/0AJtZ0+zbRvkB/usLuxusY8xXRx9Ksi/LQlcIHw
+M4S/qlzT9POyJx4gY9lmycstUcdwG2cinN4OlV22zzo
+-> ssh-ed25519 MSF3dg Ydl7uBWzBx6sAaxbzC3x8qiaU3ysGqV4rUFLpHCEV30
+/1AUHBhCNOs9i7LJbmzwQDHsu+ybzYf6+coztKk5E3U
+--- kYt15WxClpT7PXD1oFe9GqJU+OswjH7y9wIc8/GzZ7M
+<EFBFBD><EFBFBD>h<>ߓ<><DF93><EFBFBD>`<60><><EFBFBD>V4F<34><46>_k)^<5E>m$uj:ѳ<><D1B3><17><><EFBFBD>}<7D>Z]$U]<12>u<EFBFBD> <20>0<EFBFBD><30><EFBFBD>v8<76>?<3F>X<EFBFBD>P<EFBFBD>g%d<>#<23>d9{rAi<41><69>
--- a/secrets/gitlab-bsc-docker-token.age
+++ b/secrets/gitlab-bsc-docker-token.age
--- a/secrets/gitlab-runner-docker-token.age
+++ b/secrets/gitlab-runner-docker-token.age
--- a/secrets/gitlab-runner-shell-token.age
+++ b/secrets/gitlab-runner-shell-token.age
--- a/secrets/ipmi.yml.age
+++ b/secrets/ipmi.yml.age
--- a/secrets/jungle-robot-password.age
+++ b/secrets/jungle-robot-password.age
--- a/secrets/munge-key.age
+++ b/secrets/munge-key.age
--- a/secrets/nix-serve.age
+++ b/secrets/nix-serve.age
--- a/secrets/secrets.nix
+++ b/secrets/secrets.nix
@@ -2,8 +2,6 @@ let
  keys = import ../keys.nix;
  adminsKeys = builtins.attrValues keys.admins;
  hut = [ keys.hosts.hut ] ++ adminsKeys;
-  fox = [ keys.hosts.fox ] ++ adminsKeys;
-  apex = [ keys.hosts.apex ] ++ adminsKeys;
  mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
  tent = [ keys.hosts.tent ] ++ adminsKeys;
  # Only expose ceph keys to safe nodes and admins
@@ -26,7 +24,4 @@ in

  "ceph-user.age".publicKeys = safe;
  "munge-key.age".publicKeys = safe;
-
-  "wg-fox.age".publicKeys = fox;
-  "wg-apex.age".publicKeys = apex;
 }
--- a/secrets/tent-gitlab-runner-bsc-docker-token.age
+++ b/secrets/tent-gitlab-runner-bsc-docker-token.age
@@ -1,13 +1,11 @@
 age-encryption.org/v1
-> ssh-ed25519 G5LX5w Zhbs+NM/SI49qQ0X8bBpWUWxYM0vUKCXNAnPpIE2NR0
-CkBUmJ26EkwHztT8Pz0UGq2KZwN0Xz8iYQ9cEHL9OWQ
-> ssh-ed25519 cK5kHw 5KjUXJywRDp2A7l5ukTCS+WIAalxwP1f71ejGxwNrX4
-JW8OLmfkULXo9AwYMGNyOgZ+nQ0MVc0PCM4kKPIo6V4
-> ssh-ed25519 CAWG4Q cVjY3R0ZHAfokA4kWlu5vOl2Gs7mdqRgRk4WSUOXAjg
-IxEDvuximW99EqxmpW+Btpm0Zydmwg/u87bqnl26NYc
-> ssh-ed25519 xA739A hmuwZuxmJnuAjmU4X8yhPQ+hPWvN1G+ZS0pvD7fHamg
-fnAPW6ZCrv5pSO4RQhhr8xz7ij7jAZJk0ApWluOXDng
-> ssh-ed25519 MSF3dg SSGLcWnum0Qo/0OnKDZVg9xAZMwGwVNYYmRJXxb4GU0
-pdl6kATG7n2oMsoUboBfu+vDKurJcH1UvUa70rfMQkE
--- a2ZQAeAQlO9DWnegIAq6NpI1Po6f38l+hitZvq+zIW8
-<EFBFBD>\ֺ"^<5E>DT<44>H<EFBFBD><48>3<EFBFBD><33><EFBFBD>_|.h<0E><><EFBFBD><EFBFBD><03>^<5E>n<14><0E><><EFBFBD><EFBFBD><1A>g<EFBFBD>S<EFBFBD>]_<><5F>?n<>z~2<>!<21>p7<70><37><<3C><14>ʨD?<3F>~<02>F<EFBFBD>$<24>`<60>q+<2B><><EFBFBD>SW<53>(+<2B><>P<EFBFBD>c<1E>u[<5B>m<EFBFBD>`O<>ܛ<EFBFBD>ϖT
+-> ssh-ed25519 G5LX5w HlQ4V8lBd3im5j8KHEuQZBTuztvPj1QoWdv6FL6qzGI
+Jpt91X1UIIVFQt1X6Q//kALn+Cetp/LqBZZvTuhFthw
+-> ssh-ed25519 CAWG4Q StnngJAcuAwUnTrXDR3nJ2KFN0jNdTqSz+/1TfmWkzA
+CR4AQ6fqaJVY1mdUIX1gzaZwRs1sU8F8hHztnkN8vN0
+-> ssh-ed25519 xA739A xya5A5t63Owx+VrGgUfV/lIP8b/xV1cerMpuZBLaDVM
+w+pA583yUnFq2AvGBGzWbQIGQEY9WqW0CSLQ9v+SG0c
+-> ssh-ed25519 MSF3dg aXkLxCyYdOwVopHHmpXEI6WlAIizKdJi4IO0KEdhS3s
+WKXkTszZN66+QZdSDJ4D9q7xgYWMfliOLCubIF2Dqkc
+--- uVWoU2lMkqQ/9Z0BqKRCeUpsKi8lwmHukT/FV8wYMbg
+<EFBFBD><EFBFBD>1G+<2B>6<EFBFBD><36>g[|x]2T<32>й<EFBFBD><D0B9><EFBFBD> <20>CKu)<29><><EFBFBD>]<5D><>8֓<38><D693><EFBFBD><EFBFBD>l<EFBFBD><6C>S<EFBFBD><53><EFBFBD>Q<EFBFBD><07><>x<EFBFBD><78><EFBFBD><EFBFBD>#7r<37>k{*<2A><>3ս~C<>b<EFBFBD><62><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڵ<EFBFBD>Np<1E><05>]J]h<>je+d%Е<>#<23>m<EFBFBD>?=6}<7D> 
--- a/secrets/tent-gitlab-runner-pm-docker-token.age
+++ b/secrets/tent-gitlab-runner-pm-docker-token.age
@@ -1,13 +1,11 @@
 age-encryption.org/v1
-> ssh-ed25519 G5LX5w VKM/Y6Wy0gmb2gc4Q00VzHQ4IAxfSyshuDoaAzlEkFM
-vf18uoEN5ZLJ4HcJg85epaseh1CRL9/ncXtU2HpH+QE
-> ssh-ed25519 cK5kHw sMuG07kjlI6VjPjELOUPzkn+KT9Yq7BPf0zSATM2aGI
-/eODwL8KwyVgFjBK2MJlbqjN7mEvXCSsjq9D96szrng
-> ssh-ed25519 CAWG4Q t3/Ty7yCqC5x8KQY4VaHSQ9Q3epqMpXoBDKyKx9+VzE
-JwgUsqMd+1jFZvFp9/SIoowbhSMVEkKp03T69+OHjho
-> ssh-ed25519 xA739A 0ohmKK427+4vupivrtjXp0dDK8wT4XUA9rWgcsCGKgA
-msbeQyz3pL8RLtAeXX5tsfyHyOXxhfYpqaLEKnRxpPQ
-> ssh-ed25519 MSF3dg H+6jAoP7/Dxp8C/7Bk1C4CT1hpkUhtbnTWWIxkO24Ec
-SrMuUG93T5lUw3xINEen5EEKLXJizIGFhBO1fVroFHE
--- tIPnH9cxTV3m3qzvZB97Egz+raWwZJ182BXXKDu8f+o
-<EFBFBD><EFBFBD>f#<23>,|<7C>Ey.v<>DL<44>Ӻ<05>JPX<50><07><>`<60><><EFBFBD><EFBFBD>-#<23>F<EFBFBD>Ubs<62>(Q!?<3F><1A>#xJG?5<><35><EFBFBD><EFBFBD><EFBFBD>~<7E><>6MA<15>U<><55><EFBFBD>C<01><>M<>$+}W<>NϨG!<21><><EFBFBD><EFBFBD>a<EFBFBD><61><EFBFBD><EFBFBD>%<25>ǽ<EFBFBD>G
+-> ssh-ed25519 G5LX5w sg9SmahxBg35MDIxhrp4oHkaTaxsKoVQju2eNhCt0BM
+CZ64dEGqz2tbkG8KtimZvLUEMrQpVVBJP7Fu46WTMgc
+-> ssh-ed25519 CAWG4Q jzS1R14W1CWxdziMLG/yCGPLWSkiyE+9lqyCVe491ng
+acJo/nhKq3pSPoFEPaFLN1fzHHbEzstNoLtohWAHKiM
+-> ssh-ed25519 xA739A qeGJoLeSIQwLU2Yg+Gi2bikHJ3HscLfyo1msqL3JwHw
+tTwaxRBKTl/SoyY/LnxR/j/5WvCNX5VeZLKi018YMrY
+-> ssh-ed25519 MSF3dg Wym7Uyf1XvH1H6mNDERkO8opkMiN0zzXm2PjXftEOWs
+Uw8ZwwKIB5UqgVuoSLE2QajNDJZkH7/Y3Nsy+WFl7Xs
+--- 94hGVbYiCGZdMEJesCMLh7IZi+w5l/Kr1lZJHQgrc0o
+j5j磛<6A><04><>J<EFBFBD><4A><EFBFBD>a<EFBFBD>]<5D>a%dr<64><72>FDT<44><54>^<5E><>Q<EFBFBD>s/<2F>kwB<77>$<24><>$<24><>H<EFBFBD>'<27><><EFBFBD><EFBFBD><EFBFBD>w<14><?^|<7C><07>h$<24>ؗ<EFBFBD>GI<47>ĕsT2RU<52><55>*/O<>7<EFBFBD><37><EFBFBD>G<EFBFBD>pͪ<70>4<EFBFBD><34><EFBFBD>M9<4D>j<><06>
--- a/secrets/tent-gitlab-runner-pm-shell-token.age
+++ b/secrets/tent-gitlab-runner-pm-shell-token.age
@@ -1,13 +1,12 @@
 age-encryption.org/v1
-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY
-RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4
-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE
-Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM
-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE
-FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU
-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU
-sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0
-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc
-8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA
--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ
-b<>#<23><>M.<16>@<40>t<EFBFBD><74><EFBFBD>ŵ}+ό#@<40><><EFBFBD><EFBFBD><EFBFBD>k<EFBFBD>y<EFBFBD><79><EFBFBD>?v<><76>n<1F><>T<EFBFBD>+<2B><><EFBFBD>[<5B>Q<EFBFBD> gA<67><41><EFBFBD>
+-> ssh-ed25519 G5LX5w 5K0mzfJGvAB2LGmoQ9ZLbWooVEX6F4+fQdo1JUoB3FM
+AKGa507bUrYjXFaMQ1MXTDBFYsdS6zbs+flmxYN0UNo
+-> ssh-ed25519 CAWG4Q 8KzLc949on8iN1pK8q11OpCIeO71t6b0zxCLHhcQ6ns
+uy7z6RdIuoUes+Uap3k5eoFFuu/DcSrEBwq4V4C/ygc
+-> ssh-ed25519 xA739A SLx5cKo0fdAHj+cLpJ4FYTWTUTyDsCqKQOufDu3xnGo
+VnS/WsiSaf6RpXuhgfij4pYu4p9hlJl1oXrfYY9rKlQ
+-> ssh-ed25519 MSF3dg c5ZXvdNxNfZU3HeWsttuhy+UC5JxWN/IFuCuCGbksn4
+vcKlIirf+VvERX71YpmwW6zp6ClhlG2PR4R8LIN7cQo
+--- pJKICDaYAlxqNnvHIuzB3Yk7tv0ZNYflGTQD+Zk/8+4
+<EFBFBD>h/\J<>J
+<EFBFBD>0?<3F> <20>p<EFBFBD><70><EFBFBD>@܉7<DC89><37>3<EFBFBD><33><EFBFBD><EFBFBD>z<EFBFBD><7A><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>a<EFBFBD><61>'<27>,ka<6B>I<EFBFBD>XXOZ<4F>I\<5C><><EFBFBD><EFBFBD><EFBFBD>	<09>BP<42><50>/cUɿ~B<><42>S'Q<><51><EFBFBD><EFBFBD>f<06><><EFBFBD>er<65><72><EFBFBD><EFBFBD>^<5E><><EFBFBD><EFBFBD>8l<38><6C>V<EFBFBD>E<EFBFBD><45><EFBFBD>
--- a/secrets/vpn-dac-client-key.age
+++ b/secrets/vpn-dac-client-key.age
--- a/secrets/vpn-dac-login.age
+++ b/secrets/vpn-dac-login.age
@@ -1,14 +1,12 @@
 age-encryption.org/v1
-> ssh-ed25519 G5LX5w SRJhNenoQXbT1FgX3TMPnVH5P6oe2eHot+M1YsEjsEk
-hfTSLgKi98Eh7JK5o7x2POpTEtQlQCpEa3keUFYCuME
-> ssh-ed25519 cK5kHw z5TwWJTkvx7HztjXHJW/aCOtOfPrQaLP0gyIT7rXcyU
-b4NCpHfasgvkLLr+6LcWUl60p59aSNnfp3bl2OFYXo0
-> ssh-ed25519 CAWG4Q 4VpS1/OnFe8nxcQbRTKNhjsh/ZQ5cbhSMXwK/jjQ+3o
-WF9wvOkqVml4UcEzyzeumKuUwCwwr2zvKLMg+PCB8nk
-> ssh-ed25519 xA739A 67FhuJ070jBVMt/xbKHWhfri6iIm0FyaFvzQabsvFBM
-1G5/913dDv/r/6p1x/c5YiUnZzrX/LvIj33KW+PN0KU
-> ssh-ed25519 MSF3dg Bj/yB4N2wkyHCHC22tcjjJAA4ebSamN0Z4UVX3ZnryI
-6D/ZgTs+j+MGDAbPU5zyK0i9zN6tQy68IcOnQZ27mYg
--- 169erk3ICSYLs4FPEuXCn7QlekWhsmSn0Lr+/R14I5Q
-<EFBFBD><EFBFBD><EFBFBD><EFBFBD><05>ҽ3<D2BD>s<EFBFBD>
-w<EFBFBD><EFBFBD>4D<EFBFBD><EFBFBD>b.<2E><><EFBFBD>"|<7C><><EFBFBD>)"<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>;<3B>.<2E>ɫ7)<29>LeC<05>=S؟
+-> ssh-ed25519 G5LX5w /RF8uZ/KahUqjEFILbF3+Jin+U0SQdoQChcc9RJ9axc
+aEmPk++86nBR6d2BIa/oaUdyiLS6cH8TUoYJE3bxba4
+-> ssh-ed25519 CAWG4Q qHyh9nQi8c3z/KHby9y5vhzN0Dwz0zca98ebjJmXrzs
+ZbmwNzrSSQ3RvskE8SqcBa0vMy8pzm/HPGHLm5zuPGQ
+-> ssh-ed25519 xA739A FlGbfS4bUxA3gVDzb3yPjp4hV8a7aiNBLUctnN3bGEY
+3fI6SyVjVhh2M8uc/XV3blpdQMPMYi2qzaHNXvx0bvM
+-> ssh-ed25519 MSF3dg 0Bs/aW0nNISS+93It75o6hKZWa7S+LF5bF5ApsJ2fQ8
+y7o0KYDHEen13ndIxg/mYil3eMxxzvYF2pWqhMb+rBU
+--- Iqo75G4+02Y9nc1OOkcEx+iQlKnGYCekAx76tRH53wA
+<10>
+<EFBFBD>X<EFBFBD><EFBFBD>%f<0C><><12>hX<0B><>R<>c<EFBFBD>+z<><7A>eg<65>& <20>d<EFBFBD><64><EFBFBD>ק<06><>A<EFBFBD><41><EFBFBD>чXM<58>1<EFBFBD>
--- a/secrets/wg-apex.age
+++ b/secrets/wg-apex.age
--- a/secrets/wg-fox.age
+++ b/secrets/wg-fox.age
@@ -1,14 +0,0 @@
-age-encryption.org/v1
-> ssh-ed25519 cDBabA heyW9/cxgwFX9IexQIXjAQDWGQPNcMXcArQp2Rxsqx4
-o9MQ7EH8PDDjsJdpH9F3Xq2zUoaDAJQlfFmYucSFs6Y
-> ssh-ed25519 cK5kHw Sza4pos7K3qW3omEeyidI/jszJNf9smemSZnUJfCIww
-D6vazXki7hIYraIuSiGPS+FPbkFUwHhHWDf52OhEIMg
-> ssh-ed25519 CAWG4Q YexIHueOIMmIN8JIDyNUOKBkyz/k18HqV3hTXh48KlM
-xh8UJzzWT6ByN+Dpn4JrMNsjGC/uc/v6LynwjBDz9NQ
-> ssh-ed25519 xA739A KySG3TXdqfCMUkVEDGa74B0op745s3XGYxFLyAXSQAc
-5EI/yb5ctW9Qu18bHm3/sK97kwGcKzzmWvPSCWm89XA
-> ssh-ed25519 MSF3dg MNxnNj0fHmri8ophexXPNjRUBUWrzcuk5S1mucxUMTE
-GVFWXtISEU8ZmlwL4nh4weAgfGrt2GHX0DTzbpS6zg8
--- UdrqkYG2ZApAuwdZeNhC50NP2rkD/Ol6y8nJa4RHx7Y
-<EFBFBD>ܻ<EFBFBD>m(<28><><EFBFBD>><3E>H<48>Y87<><37>G<0F>+*<12><><EFBFBD><EFBFBD>9V<>.<2E><><EFBFBD><EFBFBD><03><><EFBFBD>p<EFBFBD>Oo<4F>=+哇<>P0<50><30>{<7B>)<29><17><><EFBFBD><EFBFBD>><3E>z3P^
-u
--- a/web/content/fox/_index.md
+++ b/web/content/fox/_index.md
@@ -21,28 +21,17 @@ the detailed specifications:

 ## Access

-To access the machine, request a SLURM session from [apex](/apex) using the `fox`
-partition. If you need the machine for performance measurements, use an
-exclusive reservation:
+To access the machine, request a SLURM session from [hut](/hut) using the `fox`
+partition:

-    apex% salloc -p fox --exclusive
+    hut% salloc -p fox

-Otherwise, specify the CPUs that you need so other users can also use the node
-at the same time:
+Then connect via ssh:

-    apex% salloc -p fox -c 8
-
-Then use srun to execute an interactive shell:
-
-    apex% srun --pty $SHELL
+    hut% ssh fox
    fox%

-Make sure you get all CPUs you expect:
-
-    fox% grep Cpus_allowed_list /proc/self/status
-    Cpus_allowed_list:	0-191
-
-Follow [these steps](/access) if you don't have access to apex or fox.
+Follow [these steps](/access) if you don't have access to hut or fox.

 ## CUDA

@@ -100,8 +89,9 @@ Then just run `nix develop` from the same directory:

 The machine has several file systems available.

- `/nfs/home`: The `/home` from apex via NFS, which is also shared with other
-  xeon machines. It has about 2 ms of latency, so not suitable for quick random
-  access.
+- `$HOME`: Mounted via NFS across all nodes. It is slow and has low capacity.
+  Don't abuse.
+- `/ceph/home/$USER`: Shared Ceph file system across jungle nodes. Slow but high
+  capacity. Stores three redundant copies of every file.
 - `/nvme{0,1}/$USER`: The two local NVME disks, very fast and large capacity.
 - `/tmp`: tmpfs, fast but not backed by a disk. Will be erased on reboot.
Author	SHA1	Message	Date
Rodrigo Arias Mallo	59cc987954	Enable open source NVidia driver in fox It is recommended for newer versions.	2025-07-17 11:32:35 +02:00
Rodrigo Arias Mallo	d3ddb3e1a5	Remove option allowUnfree from fox and raccoon It is already set to true for all machines.	2025-07-17 11:26:27 +02:00