From 52eed708f08d3b3f50e0dfb8f514087bbcfc91f1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Fri, 31 Mar 2023 18:27:25 +0200 Subject: [PATCH 001/472] Add initial configuration --- boot.nix | 16 +++++++++++++ configuration.nix | 42 ++++++++++++++++++++++++++++++++++ fs.nix | 10 +++++++++ gitlab-runner.nix | 46 ++++++++++++++++++++++++++++++++++++++ hardware-configuration.nix | 37 ++++++++++++++++++++++++++++++ net.nix | 26 +++++++++++++++++++++ ssh.nix | 24 ++++++++++++++++++++ users.nix | 15 +++++++++++++ 8 files changed, 216 insertions(+) create mode 100644 boot.nix create mode 100644 configuration.nix create mode 100644 fs.nix create mode 100644 gitlab-runner.nix create mode 100644 hardware-configuration.nix create mode 100644 net.nix create mode 100644 ssh.nix create mode 100644 users.nix diff --git a/boot.nix b/boot.nix new file mode 100644 index 0000000..033e9b3 --- /dev/null +++ b/boot.nix @@ -0,0 +1,16 @@ +{ ... }: + +{ + # Use the GRUB 2 boot loader. + boot.loader.grub.enable = true; + boot.loader.grub.version = 2; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; + + # Enable serial console + boot.kernelParams = [ + "console=tty1" + "console=ttyS0,115200" + ]; +} diff --git a/configuration.nix b/configuration.nix new file mode 100644 index 0000000..4a20d1d --- /dev/null +++ b/configuration.nix @@ -0,0 +1,42 @@ +{ config, pkgs, ... }: + +{ + imports = [ + ./hardware-configuration.nix + + ./boot.nix + ./fs.nix + ./gitlab-runner.nix + ./net.nix + ./ssh.nix + ./users.nix + ]; + + systemd.services."serial-getty@ttyS0" = { + enable = true; + wantedBy = [ "getty.target" ]; + serviceConfig.Restart = "always"; + }; + + time.timeZone = "Europe/Madrid"; + i18n.defaultLocale = "en_US.UTF-8"; + + environment.systemPackages = with pkgs; [ + vim wget git htop + ]; + + nix.settings.experimental-features = [ "nix-command" "flakes" ]; + + # Copy the NixOS configuration file and link it from the resulting system + # (/run/current-system/configuration.nix). This is useful in case you + # accidentally delete configuration.nix. + system.copySystemConfiguration = true; + + # This value determines the NixOS release from which the default + # settings for stateful data, like file locations and database versions + # on your system were taken. It‘s perfectly fine and recommended to leave + # this value at the release version of the first install of this system. + # Before changing this value read the documentation for this option + # (e.g. man configuration.nix or on https://nixos.org/nixos/options.html). + system.stateVersion = "22.11"; # Did you read the comment? +} diff --git a/fs.nix b/fs.nix new file mode 100644 index 0000000..7a459dd --- /dev/null +++ b/fs.nix @@ -0,0 +1,10 @@ +{ ... }: + +{ + # Mount the home via NFS + fileSystems."/home" = { + device = "10.0.40.30:/home"; + fsType = "nfs"; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" ]; + }; +} diff --git a/gitlab-runner.nix b/gitlab-runner.nix new file mode 100644 index 0000000..67c400d --- /dev/null +++ b/gitlab-runner.nix @@ -0,0 +1,46 @@ +{ pkgs, lib, config, ... }: + +{ + services.gitlab-runner = { + enable = true; + services = { + # runner for executing stuff on host system (very insecure!) + # make sure to add required packages (including git!) + # to `environment.systemPackages` + shell = { + # File should contain at least these two variables: + # `CI_SERVER_URL` + # `REGISTRATION_TOKEN` + registrationConfigFile = "/run/secrets/gitlab-runner-registration"; + executor = "shell"; + tagList = [ "nix" "xeon" ]; + environmentVariables = { + SHELL = "${pkgs.bash}/bin/bash"; + }; + }; + # # runner for everything else + # default = { + # # File should contain at least these two variables: + # # `CI_SERVER_URL` + # # `REGISTRATION_TOKEN` + # registrationConfigFile = "/run/secrets/gitlab-runner-registration"; + # dockerImage = "debian:stable"; + # }; + }; + }; + + #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; + systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; + systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; + systemd.services.gitlab-runner.serviceConfig.Group = "gitlab-runner"; + + users.users.gitlab-runner = { + uid = config.ids.uids.gitlab-runner; + #isNormalUser = true; + home = "/var/lib/gitlab-runner"; + description = "Gitlab Runner"; + group = "gitlab-runner"; + createHome = true; + }; + users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner; +} diff --git a/hardware-configuration.nix b/hardware-configuration.nix new file mode 100644 index 0000000..c69f868 --- /dev/null +++ b/hardware-configuration.nix @@ -0,0 +1,37 @@ +# Do not modify this file! It was generated by ‘nixos-generate-config’ +# and may be overwritten by future invocations. Please make changes +# to /etc/nixos/configuration.nix instead. +{ config, lib, pkgs, modulesPath, ... }: + +{ + imports = + [ (modulesPath + "/installer/scan/not-detected.nix") + ]; + + boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "ehci_pci" "nvme" "usbhid" "sd_mod" ]; + boot.initrd.kernelModules = [ ]; + boot.kernelModules = [ "kvm-intel" ]; + boot.extraModulePackages = [ ]; + + fileSystems."/" = + { device = "/dev/disk/by-uuid/289f78d9-b339-47de-b321-0a6796b9a79b"; + fsType = "ext4"; + }; + + swapDevices = + [ { device = "/dev/disk/by-uuid/2bac02f9-7ea1-4868-9536-23710f19baca"; } + ]; + + # Enables DHCP on each ethernet and wireless interface. In case of scripted networking + # (the default) this is the recommended approach. When using systemd-networkd it's + # still possible to use this option, but it's recommended to use it in conjunction + # with explicit per-interface declarations with `networking.interfaces..useDHCP`. + networking.useDHCP = lib.mkDefault true; + # networking.interfaces.eth0.useDHCP = lib.mkDefault true; + # networking.interfaces.eth1.useDHCP = lib.mkDefault true; + # networking.interfaces.ib0.useDHCP = lib.mkDefault true; + + nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; + powerManagement.cpuFreqGovernor = lib.mkDefault "powersave"; + hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; +} diff --git a/net.nix b/net.nix new file mode 100644 index 0000000..a9ae546 --- /dev/null +++ b/net.nix @@ -0,0 +1,26 @@ +{ ... }: + +{ + networking = { + hostName = "xeon07"; + + useDHCP = false; + defaultGateway = "10.0.40.30"; + nameservers = ["8.8.8.8"]; + interfaces.eno1.useDHCP = false; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.7"; + prefixLength = 24; + } ]; + + proxy = { + default = "http://localhost:23080/"; + noProxy = "127.0.0.1,localhost,internal.domain"; + }; + + firewall = { + enable = true; + allowedTCPPorts = [ 22 80 443 ]; + }; + }; +} diff --git a/ssh.nix b/ssh.nix new file mode 100644 index 0000000..3c97d4c --- /dev/null +++ b/ssh.nix @@ -0,0 +1,24 @@ +{ ... }: + +{ + # Enable the OpenSSH daemon. + services.openssh.enable = true; + + # Connect to intranet git hosts via proxy + programs.ssh.extraConfig = '' + Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es + User git + ProxyCommand nc -X connect -x localhost:23080 %h %p + ''; + + # Authorize keys + users.users = { + root.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" ]; + rarias.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" ]; + }; + + programs.ssh.knownHosts = { + "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; + "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; + }; +} diff --git a/users.nix b/users.nix new file mode 100644 index 0000000..617ff5e --- /dev/null +++ b/users.nix @@ -0,0 +1,15 @@ +{ ... }: + +{ + users = { + mutableUsers = false; + users.rarias = { + uid = 1880; + isNormalUser = true; + home = "/home/Computational/rarias"; + description = "Rodrigo Arias"; + extraGroups = [ "wheel" ]; + hashedPassword = "$6$u06tkCy13enReBsb$xiI.twRvvTfH4jdS3s68NZ7U9PSbGKs5.LXU/UgoawSwNWhZo2hRAjNL5qG0/lAckzcho2LjD0r3NfVPvthY6/"; + }; + }; +} -- 2.49.0 From df371c950fc164dc2a75d2a43cb5390d9fd6bfbe Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Mon, 3 Apr 2023 12:51:44 +0200 Subject: [PATCH 002/472] Setup slurm and gitlab-runner --- boot.nix | 18 +++++++++- configuration.nix | 10 ++++-- fs.nix | 8 ++++- gitlab-runner.nix | 40 ++++++++++++++++----- net.nix | 68 ++++++++++++++++++++++++++++++++++-- overlays-compat/overlays.nix | 8 +++++ overlays.nix | 25 +++++++++++++ slurm.nix | 12 +++++++ users.nix | 5 +++ 9 files changed, 180 insertions(+), 14 deletions(-) create mode 100644 overlays-compat/overlays.nix create mode 100644 overlays.nix create mode 100644 slurm.nix diff --git a/boot.nix b/boot.nix index 033e9b3..56a2a31 100644 --- a/boot.nix +++ b/boot.nix @@ -1,4 +1,4 @@ -{ ... }: +{ lib, ... }: { # Use the GRUB 2 boot loader. @@ -8,9 +8,25 @@ # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; + # Enable GRUB2 serial console + boot.loader.grub.extraConfig = '' + serial --unit=0 --speed=115200 --word=8 --parity=no --stop=1 + terminal_input --append serial + terminal_output --append serial + ''; + # Enable serial console boot.kernelParams = [ "console=tty1" "console=ttyS0,115200" ]; + + boot.kernelPatches = lib.singleton { + name = "osnoise-tracer"; + patch = null; + extraStructuredConfig = with lib.kernel; { + OSNOISE_TRACER = yes; + HWLAT_TRACER = yes; + }; + }; } diff --git a/configuration.nix b/configuration.nix index 4a20d1d..4e1e341 100644 --- a/configuration.nix +++ b/configuration.nix @@ -8,8 +8,10 @@ ./fs.nix ./gitlab-runner.nix ./net.nix + ./slurm.nix ./ssh.nix ./users.nix + ./overlays.nix ]; systemd.services."serial-getty@ttyS0" = { @@ -19,13 +21,17 @@ }; time.timeZone = "Europe/Madrid"; - i18n.defaultLocale = "en_US.UTF-8"; + i18n.defaultLocale = "en_DK.UTF-8"; environment.systemPackages = with pkgs; [ - vim wget git htop + vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option ]; nix.settings.experimental-features = [ "nix-command" "flakes" ]; + nix.gc.automatic = true; + + programs.zsh.enable = true; + programs.zsh.histSize = 100000; # Copy the NixOS configuration file and link it from the resulting system # (/run/current-system/configuration.nix). This is useful in case you diff --git a/fs.nix b/fs.nix index 7a459dd..7d8e36d 100644 --- a/fs.nix +++ b/fs.nix @@ -5,6 +5,12 @@ fileSystems."/home" = { device = "10.0.40.30:/home"; fsType = "nfs"; - options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" ]; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; + }; + + # Tracing + fileSystems."/sys/kernel/tracing" = { + device = "none"; + fsType = "tracefs"; }; } diff --git a/gitlab-runner.nix b/gitlab-runner.nix index 67c400d..ae1f1a3 100644 --- a/gitlab-runner.nix +++ b/gitlab-runner.nix @@ -18,14 +18,21 @@ SHELL = "${pkgs.bash}/bin/bash"; }; }; - # # runner for everything else - # default = { - # # File should contain at least these two variables: - # # `CI_SERVER_URL` - # # `REGISTRATION_TOKEN` - # registrationConfigFile = "/run/secrets/gitlab-runner-registration"; - # dockerImage = "debian:stable"; - # }; + + # runner for everything else + default = { + # File should contain at least these two variables: + # `CI_SERVER_URL` + # `REGISTRATION_TOKEN` + registrationConfigFile = "/run/secrets/gitlab-runner-registration"; + dockerImage = "debian:stable"; + tagList = [ "docker" "xeon" ]; + registrationFlags = [ "--docker-network-mode host" ]; + environmentVariables = { + https_proxy = "http://localhost:23080"; + http_proxy = "http://localhost:23080"; + }; + }; }; }; @@ -33,6 +40,22 @@ systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; systemd.services.gitlab-runner.serviceConfig.Group = "gitlab-runner"; + #systemd.services.gitlab-runner.serviceConfig.ExecStart = lib.mkForce + # ''${pkgs.gitlab-runner}/bin/gitlab-runner --debug run --config ''${HOME}/.gitlab-runner/config.toml --working-directory ''${HOME}''; + + # TODO https://docs.gitlab.com/runner/configuration/proxy.html + #systemd.services.docker.environment = { + # HTTP_PROXY="http://localhost:23080/"; + # HTTPS_PROXY="http://localhost:23080/"; + #}; + + #virtualisation.docker.daemon.settings = { + # proxies.default = { + # httpProxy = "http://localhost:23080/"; + # httpsProxy = "http://localhost:23080/"; + # noProxy = "localhost,127.0.0.0/8"; + # }; + #}; users.users.gitlab-runner = { uid = config.ids.uids.gitlab-runner; @@ -40,6 +63,7 @@ home = "/var/lib/gitlab-runner"; description = "Gitlab Runner"; group = "gitlab-runner"; + extraGroups = [ "docker" ]; createHome = true; }; users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner; diff --git a/net.nix b/net.nix index a9ae546..23547b8 100644 --- a/net.nix +++ b/net.nix @@ -4,8 +4,9 @@ networking = { hostName = "xeon07"; + enableIPv6 = false; useDHCP = false; - defaultGateway = "10.0.40.30"; + #defaultGateway = "10.0.40.30"; nameservers = ["8.8.8.8"]; interfaces.eno1.useDHCP = false; interfaces.eno1.ipv4.addresses = [ { @@ -20,7 +21,70 @@ firewall = { enable = true; - allowedTCPPorts = [ 22 80 443 ]; + allowedTCPPorts = [ 22 ]; + + # FIXME: For slurmd as it requests the compute nodes to connect to us + allowedTCPPortRanges = [ { from=1024; to=65535; } ]; }; + + extraHosts = '' + 10.0.40.30 ssfhead + 84.88.53.236 ssfhead.bsc.es ssfhead + + # Node Entry for node: mds01 (ID=72) + 10.0.40.40 mds01 mds01-eth0 + 10.0.42.40 mds01-ib0 + 10.0.40.141 mds01-ipmi0 + + # Node Entry for node: oss01 (ID=73) + 10.0.40.41 oss01 oss01-eth0 + 10.0.42.41 oss01-ib0 + 10.0.40.142 oss01-ipmi0 + + # Node Entry for node: oss02 (ID=74) + 10.0.40.42 oss02 oss02-eth0 + 10.0.42.42 oss02-ib0 + 10.0.40.143 oss02-ipmi0 + + # Node Entry for node: xeon01 (ID=15) + 10.0.40.1 xeon01 xeon01-eth0 + 10.0.42.1 xeon01-ib0 + 10.0.40.101 xeon01-ipmi0 + + # Node Entry for node: xeon02 (ID=16) + 10.0.40.2 xeon02 xeon02-eth0 + 10.0.42.2 xeon02-ib0 + 10.0.40.102 xeon02-ipmi0 + + # Node Entry for node: xeon03 (ID=17) + 10.0.40.3 xeon03 xeon03-eth0 + 10.0.42.3 xeon03-ib0 + 10.0.40.103 xeon03-ipmi0 + + # Node Entry for node: xeon04 (ID=18) + 10.0.40.4 xeon04 xeon04-eth0 + 10.0.42.4 xeon04-ib0 + 10.0.40.104 xeon04-ipmi0 + + # Node Entry for node: xeon05 (ID=19) + 10.0.40.5 xeon05 xeon05-eth0 + 10.0.42.5 xeon05-ib0 + 10.0.40.105 xeon05-ipmi0 + + # Node Entry for node: xeon06 (ID=20) + 10.0.40.6 xeon06 xeon06-eth0 + 10.0.42.6 xeon06-ib0 + 10.0.40.106 xeon06-ipmi0 + + # Node Entry for node: xeon07 (ID=21) + 10.0.40.7 xeon07 xeon07-eth0 + 10.0.42.7 xeon07-ib0 + 10.0.40.107 xeon07-ipmi0 + + # Node Entry for node: xeon08 (ID=22) + 10.0.40.8 xeon08 xeon08-eth0 + 10.0.42.8 xeon08-ib0 + 10.0.40.108 xeon08-ipmi0 + ''; }; } diff --git a/overlays-compat/overlays.nix b/overlays-compat/overlays.nix new file mode 100644 index 0000000..8a606c5 --- /dev/null +++ b/overlays-compat/overlays.nix @@ -0,0 +1,8 @@ +self: super: +with super.lib; +let + # Load the system config and get the `nixpkgs.overlays` option + overlays = (import { }).config.nixpkgs.overlays; +in + # Apply all overlays to the input of the current "main" overlay + foldl' (flip extends) (_: super) overlays self diff --git a/overlays.nix b/overlays.nix new file mode 100644 index 0000000..38fe262 --- /dev/null +++ b/overlays.nix @@ -0,0 +1,25 @@ +{ options, ... }: + +let + + bscpkgsSrc = builtins.fetchTarball "https://pm.bsc.es/gitlab/rarias/bscpkgs/-/archive/slurm-xeon07/bscpkgs-master.tar.gz"; + bscpkgs = import "${bscpkgsSrc}/overlay.nix"; + + xeon07Overlay = (self: super: { + slurm = super.bsc.slurm-16-05-8-1; + }); + +in + +{ + nix.nixPath = + # Prepend default nixPath values. + options.nix.nixPath.default ++ + # Append our nixpkgs-overlays. + [ "nixpkgs-overlays=/etc/nixos/overlays-compat/" ] + ; + + nixpkgs.overlays = [ + bscpkgs xeon07Overlay + ]; +} diff --git a/slurm.nix b/slurm.nix new file mode 100644 index 0000000..379681d --- /dev/null +++ b/slurm.nix @@ -0,0 +1,12 @@ +{ ... }: + +{ + services.slurm = { + client.enable = true; + controlMachine = "ssfhead"; + clusterName = "owl"; + nodeName = [ + "xeon[01-08] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" + ]; + }; +} diff --git a/users.nix b/users.nix index 617ff5e..03a5eb1 100644 --- a/users.nix +++ b/users.nix @@ -8,8 +8,13 @@ isNormalUser = true; home = "/home/Computational/rarias"; description = "Rodrigo Arias"; + group = "Computational"; extraGroups = [ "wheel" ]; hashedPassword = "$6$u06tkCy13enReBsb$xiI.twRvvTfH4jdS3s68NZ7U9PSbGKs5.LXU/UgoawSwNWhZo2hRAjNL5qG0/lAckzcho2LjD0r3NfVPvthY6/"; }; + + groups = { + Computational = { gid = 564; }; + }; }; } -- 2.49.0 From ccee2339a33656eebe360550c748c4a8eb7750d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 5 Apr 2023 16:56:05 +0200 Subject: [PATCH 003/472] Add mio key --- ssh.nix | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ssh.nix b/ssh.nix index 3c97d4c..fa043bf 100644 --- a/ssh.nix +++ b/ssh.nix @@ -13,8 +13,14 @@ # Authorize keys users.users = { - root.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" ]; - rarias.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" ]; + root.openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + ]; + rarias.openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + ]; }; programs.ssh.knownHosts = { -- 2.49.0 From d0dfba5c03e4171943cfc91e63c83728d3af3ef7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 5 Apr 2023 16:56:27 +0200 Subject: [PATCH 004/472] Remove commencted docker settings --- gitlab-runner.nix | 8 -------- 1 file changed, 8 deletions(-) diff --git a/gitlab-runner.nix b/gitlab-runner.nix index ae1f1a3..5aec124 100644 --- a/gitlab-runner.nix +++ b/gitlab-runner.nix @@ -49,14 +49,6 @@ # HTTPS_PROXY="http://localhost:23080/"; #}; - #virtualisation.docker.daemon.settings = { - # proxies.default = { - # httpProxy = "http://localhost:23080/"; - # httpsProxy = "http://localhost:23080/"; - # noProxy = "localhost,127.0.0.0/8"; - # }; - #}; - users.users.gitlab-runner = { uid = config.ids.uids.gitlab-runner; #isNormalUser = true; -- 2.49.0 From e6c35604bb6183fda8d7add49a885a3c09922a7e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 5 Apr 2023 16:59:09 +0200 Subject: [PATCH 005/472] Add some tools and use relaxed for build sandbox --- configuration.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configuration.nix b/configuration.nix index 4e1e341..e00c8df 100644 --- a/configuration.nix +++ b/configuration.nix @@ -8,10 +8,10 @@ ./fs.nix ./gitlab-runner.nix ./net.nix + ./overlays.nix ./slurm.nix ./ssh.nix ./users.nix - ./overlays.nix ]; systemd.services."serial-getty@ttyS0" = { @@ -25,9 +25,11 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option + ipmitool freeipmi ethtool lm_sensors ]; nix.settings.experimental-features = [ "nix-command" "flakes" ]; + nix.settings.sandbox = "relaxed"; nix.gc.automatic = true; programs.zsh.enable = true; -- 2.49.0 From 60ff89b7ccdf29051521104fe1f1095c896c1e59 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 5 Apr 2023 17:00:01 +0200 Subject: [PATCH 006/472] Add monitoring services --- configuration.nix | 1 + monitoring.nix | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 monitoring.nix diff --git a/configuration.nix b/configuration.nix index e00c8df..e1b9ce5 100644 --- a/configuration.nix +++ b/configuration.nix @@ -7,6 +7,7 @@ ./boot.nix ./fs.nix ./gitlab-runner.nix + ./monitoring.nix ./net.nix ./overlays.nix ./slurm.nix diff --git a/monitoring.nix b/monitoring.nix new file mode 100644 index 0000000..67a8615 --- /dev/null +++ b/monitoring.nix @@ -0,0 +1,50 @@ +{ config, lib, ... }: + +{ + services.grafana = { + enable = true; + settings.server = { + http_port = 2342; + http_addr = "127.0.0.1"; + }; + }; + + services.prometheus = { + enable = true; + port = 9001; + }; + + systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false; + systemd.services.prometheus-ipmi-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + + virtualisation.docker.daemon.settings = { + metrics-addr = "127.0.0.1:9323"; + }; + + services.prometheus = { + + exporters = { + ipmi.enable = true; + ipmi.group = "root"; + ipmi.user = "root"; + node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + }; + }; + + scrapeConfigs = [ + { + job_name = "xeon07"; + static_configs = [{ + targets = [ + "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" + "127.0.0.1:${toString config.services.prometheus.exporters.ipmi.port}" + "127.0.0.1:9323" + ]; + }]; + } + ]; + }; +} -- 2.49.0 From 5d8b4e96b24a59dd0a6bee0de9fb11dc1a6510b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 5 Apr 2023 17:04:42 +0200 Subject: [PATCH 007/472] Add agenix tool --- configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configuration.nix b/configuration.nix index e1b9ce5..ae67c8e 100644 --- a/configuration.nix +++ b/configuration.nix @@ -13,6 +13,8 @@ ./slurm.nix ./ssh.nix ./users.nix + + ]; systemd.services."serial-getty@ttyS0" = { @@ -27,6 +29,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option ipmitool freeipmi ethtool lm_sensors + (pkgs.callPackage {}) ]; nix.settings.experimental-features = [ "nix-command" "flakes" ]; -- 2.49.0 From a813ea6561337aa2265ffaebe72cbdb754ff16d9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Thu, 6 Apr 2023 13:56:52 +0200 Subject: [PATCH 008/472] Enable gitlab runner monitoring --- gitlab-runner.nix | 4 ++-- monitoring.nix | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gitlab-runner.nix b/gitlab-runner.nix index 5aec124..a0f45a1 100644 --- a/gitlab-runner.nix +++ b/gitlab-runner.nix @@ -40,8 +40,8 @@ systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; systemd.services.gitlab-runner.serviceConfig.Group = "gitlab-runner"; - #systemd.services.gitlab-runner.serviceConfig.ExecStart = lib.mkForce - # ''${pkgs.gitlab-runner}/bin/gitlab-runner --debug run --config ''${HOME}/.gitlab-runner/config.toml --working-directory ''${HOME}''; + systemd.services.gitlab-runner.serviceConfig.ExecStart = lib.mkForce + ''${pkgs.gitlab-runner}/bin/gitlab-runner --debug run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}''; # TODO https://docs.gitlab.com/runner/configuration/proxy.html #systemd.services.docker.environment = { diff --git a/monitoring.nix b/monitoring.nix index 67a8615..c2644eb 100644 --- a/monitoring.nix +++ b/monitoring.nix @@ -42,6 +42,7 @@ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" "127.0.0.1:${toString config.services.prometheus.exporters.ipmi.port}" "127.0.0.1:9323" + "127.0.0.1:9252" ]; }]; } -- 2.49.0 From 8fe301203c82a436ae45c27d93011539e6e4798e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Thu, 6 Apr 2023 13:57:32 +0200 Subject: [PATCH 009/472] Export nix store over nfs --- configuration.nix | 1 + nfs.nix | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 nfs.nix diff --git a/configuration.nix b/configuration.nix index ae67c8e..9a51528 100644 --- a/configuration.nix +++ b/configuration.nix @@ -9,6 +9,7 @@ ./gitlab-runner.nix ./monitoring.nix ./net.nix + ./nfs.nix ./overlays.nix ./slurm.nix ./ssh.nix diff --git a/nfs.nix b/nfs.nix new file mode 100644 index 0000000..affb304 --- /dev/null +++ b/nfs.nix @@ -0,0 +1,9 @@ +{ ... }: + +{ + services.nfs.server.enable = true; + services.nfs.server.exports = '' + /nix 10.0.40.0/24(ro,sync,no_subtree_check,root_squash) + ''; + networking.firewall.allowedTCPPorts = [ 2049 ]; +} -- 2.49.0 From 72f965943047536a5efff270683eb3945df8d264 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Thu, 6 Apr 2023 13:58:24 +0200 Subject: [PATCH 010/472] Enable IPoIB and set the infiniband IP --- net.nix | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net.nix b/net.nix index 23547b8..550423e 100644 --- a/net.nix +++ b/net.nix @@ -1,6 +1,10 @@ -{ ... }: +{ pkgs, ... }: { + # Infiniband (IPoIB) + environment.systemPackages = [ pkgs.rdma-core ]; + boot.kernelModules = [ "ib_umad" "ib_ipoib" ]; + networking = { hostName = "xeon07"; @@ -14,6 +18,11 @@ prefixLength = 24; } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.7"; + prefixLength = 24; + } ]; + proxy = { default = "http://localhost:23080/"; noProxy = "127.0.0.1,localhost,internal.domain"; -- 2.49.0 From 40b9beb86b9417505c56d7f1a949abea81dc0680 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Thu, 6 Apr 2023 13:58:55 +0200 Subject: [PATCH 011/472] Disable ethernet specific useDHCP Is already configured by default for all interfaces. --- net.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/net.nix b/net.nix index 550423e..c9e5ccc 100644 --- a/net.nix +++ b/net.nix @@ -12,7 +12,6 @@ useDHCP = false; #defaultGateway = "10.0.40.30"; nameservers = ["8.8.8.8"]; - interfaces.eno1.useDHCP = false; interfaces.eno1.ipv4.addresses = [ { address = "10.0.40.7"; prefixLength = 24; -- 2.49.0 From 9310a7b0b9dba45c9b1eb36ff2fea44afc38b625 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 11 Apr 2023 12:47:52 +0200 Subject: [PATCH 012/472] Add gitlab-runner secrets using agenix --- gitlab-runner.nix | 27 ++++++--------------------- secrets.nix | 8 ++++++++ secrets/ovni-token.age | 11 +++++++++++ 3 files changed, 25 insertions(+), 21 deletions(-) create mode 100644 secrets.nix create mode 100644 secrets/ovni-token.age diff --git a/gitlab-runner.nix b/gitlab-runner.nix index a0f45a1..0f9c3ba 100644 --- a/gitlab-runner.nix +++ b/gitlab-runner.nix @@ -1,30 +1,21 @@ { pkgs, lib, config, ... }: { + age.secrets."secrets/ovni-token".file = ./secrets/ovni-token.age; + services.gitlab-runner = { enable = true; services = { - # runner for executing stuff on host system (very insecure!) - # make sure to add required packages (including git!) - # to `environment.systemPackages` - shell = { - # File should contain at least these two variables: - # `CI_SERVER_URL` - # `REGISTRATION_TOKEN` - registrationConfigFile = "/run/secrets/gitlab-runner-registration"; + ovni-shell = { + registrationConfigFile = config.age.secrets."secrets/ovni-token".path; executor = "shell"; tagList = [ "nix" "xeon" ]; environmentVariables = { SHELL = "${pkgs.bash}/bin/bash"; }; }; - - # runner for everything else - default = { - # File should contain at least these two variables: - # `CI_SERVER_URL` - # `REGISTRATION_TOKEN` - registrationConfigFile = "/run/secrets/gitlab-runner-registration"; + ovni-docker = { + registrationConfigFile = config.age.secrets."secrets/ovni-token".path; dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; registrationFlags = [ "--docker-network-mode host" ]; @@ -43,12 +34,6 @@ systemd.services.gitlab-runner.serviceConfig.ExecStart = lib.mkForce ''${pkgs.gitlab-runner}/bin/gitlab-runner --debug run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}''; - # TODO https://docs.gitlab.com/runner/configuration/proxy.html - #systemd.services.docker.environment = { - # HTTP_PROXY="http://localhost:23080/"; - # HTTPS_PROXY="http://localhost:23080/"; - #}; - users.users.gitlab-runner = { uid = config.ids.uids.gitlab-runner; #isNormalUser = true; diff --git a/secrets.nix b/secrets.nix new file mode 100644 index 0000000..425f8c8 --- /dev/null +++ b/secrets.nix @@ -0,0 +1,8 @@ +let + root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb"; + system = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; + systems = [ root system ]; +in +{ + "secrets/ovni-token.age".publicKeys = systems; +} diff --git a/secrets/ovni-token.age b/secrets/ovni-token.age new file mode 100644 index 0000000..8241b6d --- /dev/null +++ b/secrets/ovni-token.age @@ -0,0 +1,11 @@ +age-encryption.org/v1 +-> ssh-ed25519 MSF3dg Ivlduky3TjzCthY9RB/Jb0+MouX2FYW06hoNdQ+f818 +NKnuQrTQBXjTArXG6/5KV5cdg/9JUk/l3vVdYq0fXOE +-> ssh-ed25519 HY2yRg 1ZCKpZ7sXNPgllHoozCgyW8NqK2TCoyCYZdug6YeJkM +BEeThDkjfaK9S5a81HcyaZv9zobKANVMEimduc/IO54 +-> &eB%}y-grease o;.XY Yirz }Xh\DG +CkLRClqWRkCr7n8o5UV9+kdCik2iTG/dI1s666CKcgxbAPohmryJzOKdgRLyzCf0 +CSPMUfrixmuQtuShigtmY6Pm2A +--- GEuNMnWZ3+B6QNXv7s7bfJdJ2bJAAW+jbfHQZ0UQB+k +3.-ӮƿD{\%R0߷|PFxs_Px`4,z35Ldrj2^ +]h4~AP‹e3fTEl*8z.x207 \ No newline at end of file -- 2.49.0 From 1b5e227095bf5c15ac6c79a592d59517e879b477 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 11 Apr 2023 12:58:24 +0200 Subject: [PATCH 013/472] Disable debug from gitlab runner --- gitlab-runner.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gitlab-runner.nix b/gitlab-runner.nix index 0f9c3ba..a63fbe7 100644 --- a/gitlab-runner.nix +++ b/gitlab-runner.nix @@ -32,7 +32,7 @@ systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; systemd.services.gitlab-runner.serviceConfig.Group = "gitlab-runner"; systemd.services.gitlab-runner.serviceConfig.ExecStart = lib.mkForce - ''${pkgs.gitlab-runner}/bin/gitlab-runner --debug run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}''; + ''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}''; users.users.gitlab-runner = { uid = config.ids.uids.gitlab-runner; -- 2.49.0 From 0ca649b715884dbc69177a549cb0b46884f4c65b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 11 Apr 2023 12:59:21 +0200 Subject: [PATCH 014/472] Add nos-v gitlab runner --- gitlab-runner.nix | 11 +++++++++++ secrets.nix | 1 + secrets/nosv-token.age | Bin 0 -> 501 bytes 3 files changed, 12 insertions(+) create mode 100644 secrets/nosv-token.age diff --git a/gitlab-runner.nix b/gitlab-runner.nix index a63fbe7..ecb5118 100644 --- a/gitlab-runner.nix +++ b/gitlab-runner.nix @@ -2,6 +2,7 @@ { age.secrets."secrets/ovni-token".file = ./secrets/ovni-token.age; + age.secrets."secrets/nosv-token".file = ./secrets/nosv-token.age; services.gitlab-runner = { enable = true; @@ -24,6 +25,16 @@ http_proxy = "http://localhost:23080"; }; }; + nosv-docker = { + registrationConfigFile = config.age.secrets."secrets/nosv-token".path; + dockerImage = "debian:stable"; + tagList = [ "docker" "xeon" ]; + registrationFlags = [ "--docker-network-mode host" ]; + environmentVariables = { + https_proxy = "http://localhost:23080"; + http_proxy = "http://localhost:23080"; + }; + }; }; }; diff --git a/secrets.nix b/secrets.nix index 425f8c8..7c5aea7 100644 --- a/secrets.nix +++ b/secrets.nix @@ -5,4 +5,5 @@ let in { "secrets/ovni-token.age".publicKeys = systems; + "secrets/nosv-token.age".publicKeys = systems; } diff --git a/secrets/nosv-token.age b/secrets/nosv-token.age new file mode 100644 index 0000000000000000000000000000000000000000..b26d482ab5e0522be3058a61ea400bb213e6c327 GIT binary patch literal 501 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCUl4R$k5NmuYR)7OqH zFE2E%tn$fnDGf48iHs;qcPdQH&dx4K@lOm44h&0;@~Skd$mTN532^rh^f1cvHa1W5 z%}GiN@ia=zat!pS3NA`_%nppqbBd_)O7r*2h(x!|BhsicC|#jI-_)n5+}toUFsGz2 z%GtRrDyyi(Da1I{E8NsE&)c-bEW*q=&(GK_Ba|z}peozPGAPV4!p|opEZH%|-_zAC zBsIf3B&a0NB&W*LAXq!7v`9bSEE!~5s$X(Su)DEtdQoa(ajJr9X{J(SrJ)N~R7F;D zluvQGfxfwGx@W4kQ<`B!NpOCVaYSNLR90T9X}*`Gd4^@8TUJF>xtDg8mtkU1L4H!W zqi=epXH{A*7niQCu7YtzK%|e0dwQ;~cYbcJqiIM*uA^^xx?i}DMW8{LWp<% Date: Tue, 11 Apr 2023 20:36:54 +0200 Subject: [PATCH 015/472] Set EDITOR and add nix-diff --- configuration.nix | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/configuration.nix b/configuration.nix index 9a51528..e7fe0ec 100644 --- a/configuration.nix +++ b/configuration.nix @@ -29,10 +29,15 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - ipmitool freeipmi ethtool lm_sensors + nix-diff ipmitool freeipmi ethtool lm_sensors (pkgs.callPackage {}) ]; + environment.variables = { + EDITOR = "vim"; + VISUAL = "vim"; + }; + nix.settings.experimental-features = [ "nix-command" "flakes" ]; nix.settings.sandbox = "relaxed"; nix.gc.automatic = true; -- 2.49.0 From b5153009ea0345fec345b45d858b69e649b1e2ba Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 11 Apr 2023 21:21:22 +0200 Subject: [PATCH 016/472] Run the garbage collector once a week --- configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/configuration.nix b/configuration.nix index e7fe0ec..b16b6e6 100644 --- a/configuration.nix +++ b/configuration.nix @@ -41,6 +41,7 @@ nix.settings.experimental-features = [ "nix-command" "flakes" ]; nix.settings.sandbox = "relaxed"; nix.gc.automatic = true; + nix.gc.dates = "weekly"; programs.zsh.enable = true; programs.zsh.histSize = 100000; -- 2.49.0 From 59b8ba0e768cfbbacdf432cbcb79a72f39f4a7a8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 11 Apr 2023 21:22:00 +0200 Subject: [PATCH 017/472] Use bscpkgs master --- overlays.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/overlays.nix b/overlays.nix index 38fe262..30cc381 100644 --- a/overlays.nix +++ b/overlays.nix @@ -2,7 +2,7 @@ let - bscpkgsSrc = builtins.fetchTarball "https://pm.bsc.es/gitlab/rarias/bscpkgs/-/archive/slurm-xeon07/bscpkgs-master.tar.gz"; + bscpkgsSrc = builtins.fetchTarball "https://pm.bsc.es/gitlab/rarias/bscpkgs/-/archive/master/bscpkgs-master.tar.gz"; bscpkgs = import "${bscpkgsSrc}/overlay.nix"; xeon07Overlay = (self: super: { -- 2.49.0 From 40d0a1673681683ca5b9a04c4de6a7035cd889e5 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Fri, 14 Apr 2023 10:14:17 +0200 Subject: [PATCH 018/472] Allow wheel users to build derivations --- configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/configuration.nix b/configuration.nix index b16b6e6..11916ca 100644 --- a/configuration.nix +++ b/configuration.nix @@ -40,6 +40,7 @@ nix.settings.experimental-features = [ "nix-command" "flakes" ]; nix.settings.sandbox = "relaxed"; + nix.settings.trusted-users = [ "@wheel" ]; nix.gc.automatic = true; nix.gc.dates = "weekly"; -- 2.49.0 From 0f7a0c3ac22886e14d7a854614eb5d2db11cbb51 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 16:03:46 +0200 Subject: [PATCH 019/472] Add smartctl monitoring --- monitoring.nix | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/monitoring.nix b/monitoring.nix index c2644eb..3190d06 100644 --- a/monitoring.nix +++ b/monitoring.nix @@ -21,6 +21,13 @@ metrics-addr = "127.0.0.1:9323"; }; + # Required to allow the smartctl exporter to read the nvme0 character device, + # see the commit message on: + # https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80 + services.udev.extraRules = '' + SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk" + ''; + services.prometheus = { exporters = { @@ -32,6 +39,7 @@ enabledCollectors = [ "systemd" ]; port = 9002; }; + smartctl.enable = true; }; scrapeConfigs = [ @@ -43,6 +51,7 @@ "127.0.0.1:${toString config.services.prometheus.exporters.ipmi.port}" "127.0.0.1:9323" "127.0.0.1:9252" + "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" ]; }]; } -- 2.49.0 From 848efdcb2d80ee1a9bf9aeb0ad9a96383de82cd6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 16:09:23 +0200 Subject: [PATCH 020/472] Move xeon07 configuration to a directory --- boot.nix => xeon07/boot.nix | 0 configuration.nix => xeon07/configuration.nix | 0 fs.nix => xeon07/fs.nix | 0 gitlab-runner.nix => xeon07/gitlab-runner.nix | 0 .../hardware-configuration.nix | 0 monitoring.nix => xeon07/monitoring.nix | 0 net.nix => xeon07/net.nix | 0 nfs.nix => xeon07/nfs.nix | 0 overlays.nix => xeon07/overlays.nix | 0 secrets.nix => xeon07/secrets.nix | 0 {secrets => xeon07/secrets}/nosv-token.age | Bin {secrets => xeon07/secrets}/ovni-token.age | 0 slurm.nix => xeon07/slurm.nix | 0 ssh.nix => xeon07/ssh.nix | 0 users.nix => xeon07/users.nix | 0 15 files changed, 0 insertions(+), 0 deletions(-) rename boot.nix => xeon07/boot.nix (100%) rename configuration.nix => xeon07/configuration.nix (100%) rename fs.nix => xeon07/fs.nix (100%) rename gitlab-runner.nix => xeon07/gitlab-runner.nix (100%) rename hardware-configuration.nix => xeon07/hardware-configuration.nix (100%) rename monitoring.nix => xeon07/monitoring.nix (100%) rename net.nix => xeon07/net.nix (100%) rename nfs.nix => xeon07/nfs.nix (100%) rename overlays.nix => xeon07/overlays.nix (100%) rename secrets.nix => xeon07/secrets.nix (100%) rename {secrets => xeon07/secrets}/nosv-token.age (100%) rename {secrets => xeon07/secrets}/ovni-token.age (100%) rename slurm.nix => xeon07/slurm.nix (100%) rename ssh.nix => xeon07/ssh.nix (100%) rename users.nix => xeon07/users.nix (100%) diff --git a/boot.nix b/xeon07/boot.nix similarity index 100% rename from boot.nix rename to xeon07/boot.nix diff --git a/configuration.nix b/xeon07/configuration.nix similarity index 100% rename from configuration.nix rename to xeon07/configuration.nix diff --git a/fs.nix b/xeon07/fs.nix similarity index 100% rename from fs.nix rename to xeon07/fs.nix diff --git a/gitlab-runner.nix b/xeon07/gitlab-runner.nix similarity index 100% rename from gitlab-runner.nix rename to xeon07/gitlab-runner.nix diff --git a/hardware-configuration.nix b/xeon07/hardware-configuration.nix similarity index 100% rename from hardware-configuration.nix rename to xeon07/hardware-configuration.nix diff --git a/monitoring.nix b/xeon07/monitoring.nix similarity index 100% rename from monitoring.nix rename to xeon07/monitoring.nix diff --git a/net.nix b/xeon07/net.nix similarity index 100% rename from net.nix rename to xeon07/net.nix diff --git a/nfs.nix b/xeon07/nfs.nix similarity index 100% rename from nfs.nix rename to xeon07/nfs.nix diff --git a/overlays.nix b/xeon07/overlays.nix similarity index 100% rename from overlays.nix rename to xeon07/overlays.nix diff --git a/secrets.nix b/xeon07/secrets.nix similarity index 100% rename from secrets.nix rename to xeon07/secrets.nix diff --git a/secrets/nosv-token.age b/xeon07/secrets/nosv-token.age similarity index 100% rename from secrets/nosv-token.age rename to xeon07/secrets/nosv-token.age diff --git a/secrets/ovni-token.age b/xeon07/secrets/ovni-token.age similarity index 100% rename from secrets/ovni-token.age rename to xeon07/secrets/ovni-token.age diff --git a/slurm.nix b/xeon07/slurm.nix similarity index 100% rename from slurm.nix rename to xeon07/slurm.nix diff --git a/ssh.nix b/xeon07/ssh.nix similarity index 100% rename from ssh.nix rename to xeon07/ssh.nix diff --git a/users.nix b/xeon07/users.nix similarity index 100% rename from users.nix rename to xeon07/users.nix -- 2.49.0 From f43d5492944ed30bc04b7ca130d7a460ec3c9abb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:27:08 +0200 Subject: [PATCH 021/472] Add common directory --- common/main.nix | 6 ++++++ xeon07/configuration.nix | 1 + 2 files changed, 7 insertions(+) create mode 100644 common/main.nix diff --git a/common/main.nix b/common/main.nix new file mode 100644 index 0000000..7a0ec5c --- /dev/null +++ b/common/main.nix @@ -0,0 +1,6 @@ +{ config, pkgs, ... }: + +{ + imports = [ + ]; +} diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 11916ca..282c5dd 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -2,6 +2,7 @@ { imports = [ + ../common/main.nix ./hardware-configuration.nix ./boot.nix -- 2.49.0 From b60e821eaa34595aa0fec2a94bb9233e17fefaa6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:28:37 +0200 Subject: [PATCH 022/472] Move disk selection to configuration.nix --- xeon07/boot.nix | 3 --- xeon07/configuration.nix | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xeon07/boot.nix b/xeon07/boot.nix index 56a2a31..bd8f5a4 100644 --- a/xeon07/boot.nix +++ b/xeon07/boot.nix @@ -5,9 +5,6 @@ boot.loader.grub.enable = true; boot.loader.grub.version = 2; - # Select the this using the ID to avoid mismatches - boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; - # Enable GRUB2 serial console boot.loader.grub.extraConfig = '' serial --unit=0 --speed=115200 --word=8 --parity=no --stop=1 diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 282c5dd..a2fc393 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -19,6 +19,9 @@ ]; + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; + systemd.services."serial-getty@ttyS0" = { enable = true; wantedBy = [ "getty.target" ]; -- 2.49.0 From 733eb93f23c84656a6939685c9bade116619f38c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:30:02 +0200 Subject: [PATCH 023/472] Move boot.nix to common --- {xeon07 => common}/boot.nix | 0 common/main.nix | 1 + xeon07/configuration.nix | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename {xeon07 => common}/boot.nix (100%) diff --git a/xeon07/boot.nix b/common/boot.nix similarity index 100% rename from xeon07/boot.nix rename to common/boot.nix diff --git a/common/main.nix b/common/main.nix index 7a0ec5c..a5308a3 100644 --- a/common/main.nix +++ b/common/main.nix @@ -2,5 +2,6 @@ { imports = [ + ./boot.nix ]; } diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index a2fc393..d0827e4 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -5,7 +5,6 @@ ../common/main.nix ./hardware-configuration.nix - ./boot.nix ./fs.nix ./gitlab-runner.nix ./monitoring.nix -- 2.49.0 From a7fb69ab923ddc25e4265144a5851809630a8833 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:31:35 +0200 Subject: [PATCH 024/472] Move fs.nix to common --- {xeon07 => common}/fs.nix | 0 common/main.nix | 1 + xeon07/configuration.nix | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename {xeon07 => common}/fs.nix (100%) diff --git a/xeon07/fs.nix b/common/fs.nix similarity index 100% rename from xeon07/fs.nix rename to common/fs.nix diff --git a/common/main.nix b/common/main.nix index a5308a3..7cc9fac 100644 --- a/common/main.nix +++ b/common/main.nix @@ -3,5 +3,6 @@ { imports = [ ./boot.nix + ./fs.nix ]; } diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index d0827e4..e9e55af 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -5,7 +5,6 @@ ../common/main.nix ./hardware-configuration.nix - ./fs.nix ./gitlab-runner.nix ./monitoring.nix ./net.nix -- 2.49.0 From 035becd018215c38414923a016662e7d61cc9cde Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:34:27 +0200 Subject: [PATCH 025/472] Use partition labels for / and swap --- xeon07/hardware-configuration.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xeon07/hardware-configuration.nix b/xeon07/hardware-configuration.nix index c69f868..b14b375 100644 --- a/xeon07/hardware-configuration.nix +++ b/xeon07/hardware-configuration.nix @@ -14,12 +14,12 @@ boot.extraModulePackages = [ ]; fileSystems."/" = - { device = "/dev/disk/by-uuid/289f78d9-b339-47de-b321-0a6796b9a79b"; + { device = "/dev/disk/by-label/nixos"; fsType = "ext4"; }; swapDevices = - [ { device = "/dev/disk/by-uuid/2bac02f9-7ea1-4868-9536-23710f19baca"; } + [ { device = "/dev/disk/by-label/swap"; } ]; # Enables DHCP on each ethernet and wireless interface. In case of scripted networking -- 2.49.0 From f5b4580dae8109ece058684b56db6fc997f9fc20 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:35:58 +0200 Subject: [PATCH 026/472] Move filesystems config to common/fs.nix --- common/fs.nix | 9 +++++++++ xeon07/hardware-configuration.nix | 18 ------------------ 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/common/fs.nix b/common/fs.nix index 7d8e36d..60fccef 100644 --- a/common/fs.nix +++ b/common/fs.nix @@ -1,6 +1,15 @@ { ... }: { + fileSystems."/" = + { device = "/dev/disk/by-label/nixos"; + fsType = "ext4"; + }; + + swapDevices = + [ { device = "/dev/disk/by-label/swap"; } + ]; + # Mount the home via NFS fileSystems."/home" = { device = "10.0.40.30:/home"; diff --git a/xeon07/hardware-configuration.nix b/xeon07/hardware-configuration.nix index b14b375..7c7a591 100644 --- a/xeon07/hardware-configuration.nix +++ b/xeon07/hardware-configuration.nix @@ -13,24 +13,6 @@ boot.kernelModules = [ "kvm-intel" ]; boot.extraModulePackages = [ ]; - fileSystems."/" = - { device = "/dev/disk/by-label/nixos"; - fsType = "ext4"; - }; - - swapDevices = - [ { device = "/dev/disk/by-label/swap"; } - ]; - - # Enables DHCP on each ethernet and wireless interface. In case of scripted networking - # (the default) this is the recommended approach. When using systemd-networkd it's - # still possible to use this option, but it's recommended to use it in conjunction - # with explicit per-interface declarations with `networking.interfaces..useDHCP`. - networking.useDHCP = lib.mkDefault true; - # networking.interfaces.eth0.useDHCP = lib.mkDefault true; - # networking.interfaces.eth1.useDHCP = lib.mkDefault true; - # networking.interfaces.ib0.useDHCP = lib.mkDefault true; - nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; powerManagement.cpuFreqGovernor = lib.mkDefault "powersave"; hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; -- 2.49.0 From 6978677cb5bd76e6713b533a12498b1b237ba22b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:37:01 +0200 Subject: [PATCH 027/472] Move boot config to common/boot.nix --- common/boot.nix | 5 +++++ xeon07/hardware-configuration.nix | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/common/boot.nix b/common/boot.nix index bd8f5a4..cf85951 100644 --- a/common/boot.nix +++ b/common/boot.nix @@ -26,4 +26,9 @@ HWLAT_TRACER = yes; }; }; + + boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "ehci_pci" "nvme" "usbhid" "sd_mod" ]; + boot.initrd.kernelModules = [ ]; + boot.kernelModules = [ "kvm-intel" ]; + boot.extraModulePackages = [ ]; } diff --git a/xeon07/hardware-configuration.nix b/xeon07/hardware-configuration.nix index 7c7a591..7e4112c 100644 --- a/xeon07/hardware-configuration.nix +++ b/xeon07/hardware-configuration.nix @@ -8,11 +8,6 @@ [ (modulesPath + "/installer/scan/not-detected.nix") ]; - boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "ehci_pci" "nvme" "usbhid" "sd_mod" ]; - boot.initrd.kernelModules = [ ]; - boot.kernelModules = [ "kvm-intel" ]; - boot.extraModulePackages = [ ]; - nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; powerManagement.cpuFreqGovernor = lib.mkDefault "powersave"; hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; -- 2.49.0 From 7e6c395ff822ce2525aeccd1d631f7070ae7f198 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:38:08 +0200 Subject: [PATCH 028/472] Move the remaining hw config to common --- xeon07/hardware-configuration.nix => common/hw.nix | 0 common/main.nix | 1 + xeon07/configuration.nix | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename xeon07/hardware-configuration.nix => common/hw.nix (100%) diff --git a/xeon07/hardware-configuration.nix b/common/hw.nix similarity index 100% rename from xeon07/hardware-configuration.nix rename to common/hw.nix diff --git a/common/main.nix b/common/main.nix index 7cc9fac..3a766b4 100644 --- a/common/main.nix +++ b/common/main.nix @@ -4,5 +4,6 @@ imports = [ ./boot.nix ./fs.nix + ./hw.nix ]; } diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index e9e55af..4c9d09f 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -3,7 +3,6 @@ { imports = [ ../common/main.nix - ./hardware-configuration.nix ./gitlab-runner.nix ./monitoring.nix -- 2.49.0 From c580254dde4bb038d05f050cd16a85fdc430d466 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:43:23 +0200 Subject: [PATCH 029/472] Move common options from configuration.nix --- common/main.nix | 41 ++++++++++++++++++++++++++++++++++++++++ xeon07/configuration.nix | 38 ------------------------------------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/common/main.nix b/common/main.nix index 3a766b4..690ae75 100644 --- a/common/main.nix +++ b/common/main.nix @@ -6,4 +6,45 @@ ./fs.nix ./hw.nix ]; + + environment.systemPackages = with pkgs; [ + vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option + nix-diff ipmitool freeipmi ethtool lm_sensors + ]; + + systemd.services."serial-getty@ttyS0" = { + enable = true; + wantedBy = [ "getty.target" ]; + serviceConfig.Restart = "always"; + }; + + time.timeZone = "Europe/Madrid"; + i18n.defaultLocale = "en_DK.UTF-8"; + + environment.variables = { + EDITOR = "vim"; + VISUAL = "vim"; + }; + + nix.settings.experimental-features = [ "nix-command" "flakes" ]; + nix.settings.sandbox = "relaxed"; + nix.settings.trusted-users = [ "@wheel" ]; + nix.gc.automatic = true; + nix.gc.dates = "weekly"; + + programs.zsh.enable = true; + programs.zsh.histSize = 100000; + + # Copy the NixOS configuration file and link it from the resulting system + # (/run/current-system/configuration.nix). This is useful in case you + # accidentally delete configuration.nix. + system.copySystemConfiguration = true; + + # This value determines the NixOS release from which the default + # settings for stateful data, like file locations and database versions + # on your system were taken. It‘s perfectly fine and recommended to leave + # this value at the release version of the first install of this system. + # Before changing this value read the documentation for this option + # (e.g. man configuration.nix or on https://nixos.org/nixos/options.html). + system.stateVersion = "22.11"; # Did you read the comment? } diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 4c9d09f..3406db5 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -19,45 +19,7 @@ # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; - systemd.services."serial-getty@ttyS0" = { - enable = true; - wantedBy = [ "getty.target" ]; - serviceConfig.Restart = "always"; - }; - - time.timeZone = "Europe/Madrid"; - i18n.defaultLocale = "en_DK.UTF-8"; - environment.systemPackages = with pkgs; [ - vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors (pkgs.callPackage {}) ]; - - environment.variables = { - EDITOR = "vim"; - VISUAL = "vim"; - }; - - nix.settings.experimental-features = [ "nix-command" "flakes" ]; - nix.settings.sandbox = "relaxed"; - nix.settings.trusted-users = [ "@wheel" ]; - nix.gc.automatic = true; - nix.gc.dates = "weekly"; - - programs.zsh.enable = true; - programs.zsh.histSize = 100000; - - # Copy the NixOS configuration file and link it from the resulting system - # (/run/current-system/configuration.nix). This is useful in case you - # accidentally delete configuration.nix. - system.copySystemConfiguration = true; - - # This value determines the NixOS release from which the default - # settings for stateful data, like file locations and database versions - # on your system were taken. It‘s perfectly fine and recommended to leave - # this value at the release version of the first install of this system. - # Before changing this value read the documentation for this option - # (e.g. man configuration.nix or on https://nixos.org/nixos/options.html). - system.stateVersion = "22.11"; # Did you read the comment? } -- 2.49.0 From f7b18098b126a72e97f2a31bc9f81fd0e6264f78 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:45:10 +0200 Subject: [PATCH 030/472] Move users.nix to common --- common/main.nix | 1 + {xeon07 => common}/users.nix | 0 xeon07/configuration.nix | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename {xeon07 => common}/users.nix (100%) diff --git a/common/main.nix b/common/main.nix index 690ae75..84217f8 100644 --- a/common/main.nix +++ b/common/main.nix @@ -5,6 +5,7 @@ ./boot.nix ./fs.nix ./hw.nix + ./users.nix ]; environment.systemPackages = with pkgs; [ diff --git a/xeon07/users.nix b/common/users.nix similarity index 100% rename from xeon07/users.nix rename to common/users.nix diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 3406db5..2236f06 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -11,7 +11,6 @@ ./overlays.nix ./slurm.nix ./ssh.nix - ./users.nix ]; -- 2.49.0 From 480dd95d9b47520d7ae64c862f09709724e185c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:46:01 +0200 Subject: [PATCH 031/472] Move overlays.nix to common --- common/main.nix | 1 + {xeon07 => common}/overlays.nix | 0 xeon07/configuration.nix | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename {xeon07 => common}/overlays.nix (100%) diff --git a/common/main.nix b/common/main.nix index 84217f8..fb1b4a4 100644 --- a/common/main.nix +++ b/common/main.nix @@ -5,6 +5,7 @@ ./boot.nix ./fs.nix ./hw.nix + ./overlays.nix ./users.nix ]; diff --git a/xeon07/overlays.nix b/common/overlays.nix similarity index 100% rename from xeon07/overlays.nix rename to common/overlays.nix diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 2236f06..45367d0 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -8,7 +8,6 @@ ./monitoring.nix ./net.nix ./nfs.nix - ./overlays.nix ./slurm.nix ./ssh.nix -- 2.49.0 From ed158ee87f60e6e32c992c2b0e55eb0d34e14646 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:46:53 +0200 Subject: [PATCH 032/472] Move ssh.nix to common --- common/main.nix | 1 + {xeon07 => common}/ssh.nix | 0 xeon07/configuration.nix | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename {xeon07 => common}/ssh.nix (100%) diff --git a/common/main.nix b/common/main.nix index fb1b4a4..abe1426 100644 --- a/common/main.nix +++ b/common/main.nix @@ -6,6 +6,7 @@ ./fs.nix ./hw.nix ./overlays.nix + ./ssh.nix ./users.nix ]; diff --git a/xeon07/ssh.nix b/common/ssh.nix similarity index 100% rename from xeon07/ssh.nix rename to common/ssh.nix diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 45367d0..0715162 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -9,7 +9,6 @@ ./net.nix ./nfs.nix ./slurm.nix - ./ssh.nix ]; -- 2.49.0 From 9630b23ce20dac8047b9253cbd04d9765f752e44 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:49:54 +0200 Subject: [PATCH 033/472] Remove host specific network options from net.nix --- xeon07/configuration.nix | 12 ++++++++++++ xeon07/net.nix | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 0715162..faafe0b 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -16,6 +16,18 @@ # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; + networking = { + hostName = "xeon07"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.7"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.7"; + prefixLength = 24; + } ]; + }; + environment.systemPackages = with pkgs; [ (pkgs.callPackage {}) ]; diff --git a/xeon07/net.nix b/xeon07/net.nix index c9e5ccc..6e5d5bf 100644 --- a/xeon07/net.nix +++ b/xeon07/net.nix @@ -6,22 +6,10 @@ boot.kernelModules = [ "ib_umad" "ib_ipoib" ]; networking = { - hostName = "xeon07"; - enableIPv6 = false; useDHCP = false; #defaultGateway = "10.0.40.30"; nameservers = ["8.8.8.8"]; - interfaces.eno1.ipv4.addresses = [ { - address = "10.0.40.7"; - prefixLength = 24; - } ]; - - interfaces.ibp5s0.ipv4.addresses = [ { - address = "10.0.42.7"; - prefixLength = 24; - } ]; - proxy = { default = "http://localhost:23080/"; noProxy = "127.0.0.1,localhost,internal.domain"; -- 2.49.0 From a94765e8ae5c90b13142f19a141072732bd4ad03 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:50:44 +0200 Subject: [PATCH 034/472] Move net.nix to common --- common/main.nix | 1 + {xeon07 => common}/net.nix | 0 xeon07/configuration.nix | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename {xeon07 => common}/net.nix (100%) diff --git a/common/main.nix b/common/main.nix index abe1426..ef16e0c 100644 --- a/common/main.nix +++ b/common/main.nix @@ -5,6 +5,7 @@ ./boot.nix ./fs.nix ./hw.nix + ./net.nix ./overlays.nix ./ssh.nix ./users.nix diff --git a/xeon07/net.nix b/common/net.nix similarity index 100% rename from xeon07/net.nix rename to common/net.nix diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index faafe0b..df63508 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -6,7 +6,6 @@ ./gitlab-runner.nix ./monitoring.nix - ./net.nix ./nfs.nix ./slurm.nix -- 2.49.0 From 1009736d812821244d82897dd4c2f4f1135aec70 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:55:07 +0200 Subject: [PATCH 035/472] Load overlays from /config --- common/overlays.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/overlays.nix b/common/overlays.nix index 30cc381..23fd4c6 100644 --- a/common/overlays.nix +++ b/common/overlays.nix @@ -16,7 +16,7 @@ in # Prepend default nixPath values. options.nix.nixPath.default ++ # Append our nixpkgs-overlays. - [ "nixpkgs-overlays=/etc/nixos/overlays-compat/" ] + [ "nixpkgs-overlays=/config/overlays-compat/" ] ; nixpkgs.overlays = [ -- 2.49.0 From a5449067a76a5923dd9c12201ed73531070ba9ff Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Tue, 18 Apr 2023 18:56:31 +0200 Subject: [PATCH 036/472] Add configuration for xeon01 --- xeon01/configuration.nix | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 xeon01/configuration.nix diff --git a/xeon01/configuration.nix b/xeon01/configuration.nix new file mode 100644 index 0000000..3bf8bba --- /dev/null +++ b/xeon01/configuration.nix @@ -0,0 +1,22 @@ +{ config, pkgs, ... }: + +{ + imports = [ + ../common/main.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c"; + + networking = { + hostName = "xeon01"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.1"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.1"; + prefixLength = 24; + } ]; + }; +} -- 2.49.0 From 6cb079a44e29e3eacc77c4ac364c5bd2d78a4113 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 26 Apr 2023 14:09:23 +0200 Subject: [PATCH 037/472] Add script to rebuild configuration --- rebuild.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100755 rebuild.sh diff --git a/rebuild.sh b/rebuild.sh new file mode 100755 index 0000000..592ed38 --- /dev/null +++ b/rebuild.sh @@ -0,0 +1,16 @@ +#!/bin/sh -e + +if [ "$(id -u)" != 0 ]; then + echo "Needs root permissions" + exit 1 +fi + +host=$(hostname) +conf="$(readlink -f .)/${host}/configuration.nix" + +if [ ! -e "$conf" ]; then + echo "Missing config $conf" + exit 1 +fi + +NIXOS_CONFIG="${conf}" nixos-rebuild switch -- 2.49.0 From 0120be66fb44a878bb1e57b760a5f7b487897463 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 26 Apr 2023 13:28:04 +0200 Subject: [PATCH 038/472] Remove xeon07 overlay to load upstream slurm --- common/overlays.nix | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/common/overlays.nix b/common/overlays.nix index 23fd4c6..2fb80d7 100644 --- a/common/overlays.nix +++ b/common/overlays.nix @@ -5,10 +5,6 @@ let bscpkgsSrc = builtins.fetchTarball "https://pm.bsc.es/gitlab/rarias/bscpkgs/-/archive/master/bscpkgs-master.tar.gz"; bscpkgs = import "${bscpkgsSrc}/overlay.nix"; - xeon07Overlay = (self: super: { - slurm = super.bsc.slurm-16-05-8-1; - }); - in { @@ -20,6 +16,6 @@ in ; nixpkgs.overlays = [ - bscpkgs xeon07Overlay + bscpkgs ]; } -- 2.49.0 From e1dcad50d06fae982048237e416fa79341178599 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 26 Apr 2023 13:29:28 +0200 Subject: [PATCH 039/472] Use xeon07 as control machine --- xeon07/slurm.nix | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/xeon07/slurm.nix b/xeon07/slurm.nix index 379681d..48a77d7 100644 --- a/xeon07/slurm.nix +++ b/xeon07/slurm.nix @@ -3,10 +3,15 @@ { services.slurm = { client.enable = true; - controlMachine = "ssfhead"; + server.enable = true; + controlMachine = "xeon07"; clusterName = "owl"; nodeName = [ - "xeon[01-08] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" + "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" + ]; + + partitionName = [ + "xeon Nodes=xeon[01-02,07] Default=YES MaxTime=INFINITE State=UP" ]; }; } -- 2.49.0 From de4ac8cbd6b5aaf5301e9e1d1f552d0382be56bd Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 26 Apr 2023 13:35:06 +0200 Subject: [PATCH 040/472] Enable slurm in xeon01 --- xeon01/configuration.nix | 1 + xeon01/slurm.nix | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 xeon01/slurm.nix diff --git a/xeon01/configuration.nix b/xeon01/configuration.nix index 3bf8bba..9f651d8 100644 --- a/xeon01/configuration.nix +++ b/xeon01/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/main.nix + ./slurm.nix ]; # Select the this using the ID to avoid mismatches diff --git a/xeon01/slurm.nix b/xeon01/slurm.nix new file mode 100644 index 0000000..815c94a --- /dev/null +++ b/xeon01/slurm.nix @@ -0,0 +1,12 @@ +{ ... }: + +{ + services.slurm = { + client.enable = true; + controlMachine = "xeon07"; + clusterName = "owl"; + nodeName = [ + "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" + ]; + }; +} -- 2.49.0 From de7cae620892242df0d2eda51da00de57a5974a8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 26 Apr 2023 14:26:39 +0200 Subject: [PATCH 041/472] Test flakes --- flake.nix | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 flake.nix diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..0a90a63 --- /dev/null +++ b/flake.nix @@ -0,0 +1,16 @@ +{ + inputs.nixpkgs.url = "github:NixOS/nixpkgs/22.11"; + + outputs = { nixpkgs, ... }: { + nixosConfigurations = { + xeon01 = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ ./xeon01/configuration.nix ]; + }; + xeon07 = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ ./xeon07/configuration.nix ]; + }; + }; + }; +} -- 2.49.0 From 69bb2128dbb3fb624871278d3989767de885cd67 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 26 Apr 2023 17:36:36 +0200 Subject: [PATCH 042/472] Lock flakes and add inputs --- common/main.nix | 4 +- common/overlays.nix | 14 +------ flake.lock | 87 ++++++++++++++++++++++++++++++++++++++++ flake.nix | 50 +++++++++++++++++++++-- rebuild.sh | 15 ++++--- xeon01/slurm.nix | 2 +- xeon07/configuration.nix | 6 --- 7 files changed, 146 insertions(+), 32 deletions(-) create mode 100644 flake.lock diff --git a/common/main.nix b/common/main.nix index ef16e0c..ad54622 100644 --- a/common/main.nix +++ b/common/main.nix @@ -6,7 +6,6 @@ ./fs.nix ./hw.nix ./net.nix - ./overlays.nix ./ssh.nix ./users.nix ]; @@ -38,11 +37,12 @@ programs.zsh.enable = true; programs.zsh.histSize = 100000; + users.defaultUserShell = pkgs.zsh; # Copy the NixOS configuration file and link it from the resulting system # (/run/current-system/configuration.nix). This is useful in case you # accidentally delete configuration.nix. - system.copySystemConfiguration = true; + #system.copySystemConfiguration = true; # This value determines the NixOS release from which the default # settings for stateful data, like file locations and database versions diff --git a/common/overlays.nix b/common/overlays.nix index 2fb80d7..fdf0706 100644 --- a/common/overlays.nix +++ b/common/overlays.nix @@ -1,21 +1,9 @@ { options, ... }: -let - - bscpkgsSrc = builtins.fetchTarball "https://pm.bsc.es/gitlab/rarias/bscpkgs/-/archive/master/bscpkgs-master.tar.gz"; - bscpkgs = import "${bscpkgsSrc}/overlay.nix"; - -in - { nix.nixPath = # Prepend default nixPath values. options.nix.nixPath.default ++ # Append our nixpkgs-overlays. - [ "nixpkgs-overlays=/config/overlays-compat/" ] - ; - - nixpkgs.overlays = [ - bscpkgs - ]; + [ "nixpkgs-overlays=${../overlays-compat}" ]; } diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..c8d3383 --- /dev/null +++ b/flake.lock @@ -0,0 +1,87 @@ +{ + "nodes": { + "agenix": { + "inputs": { + "darwin": "darwin", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1682101079, + "narHash": "sha256-MdAhtjrLKnk2uiqun1FWABbKpLH090oeqCSiWemtuck=", + "owner": "ryantm", + "repo": "agenix", + "rev": "2994d002dcff5353ca1ac48ec584c7f6589fe447", + "type": "github" + }, + "original": { + "owner": "ryantm", + "repo": "agenix", + "type": "github" + } + }, + "bscpkgs": { + "locked": { + "lastModified": 1682521628, + "narHash": "sha256-uRIDCuJNt3rdikWiRcM3VPsQSk0vpQB1JO3Wx24psJo=", + "ref": "refs/heads/master", + "rev": "c775ee4d6f76aded05b08ae13924c302f18f9b2c", + "revCount": 807, + "type": "git", + "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" + }, + "original": { + "type": "git", + "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" + } + }, + "darwin": { + "inputs": { + "nixpkgs": [ + "agenix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1673295039, + "narHash": "sha256-AsdYgE8/GPwcelGgrntlijMg4t3hLFJFCRF3tL5WVjA=", + "owner": "lnl7", + "repo": "nix-darwin", + "rev": "87b9d090ad39b25b2400029c64825fc2a8868943", + "type": "github" + }, + "original": { + "owner": "lnl7", + "ref": "master", + "repo": "nix-darwin", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1669833724, + "narHash": "sha256-mlqo1r+TZUOuypWdrZHluxWL+E5WzXlUXNZ9Y0WLDFU=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "9a6aabc4740790ef3bbb246b86d029ccf6759658", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "22.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "agenix": "agenix", + "bscpkgs": "bscpkgs", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix index 0a90a63..70cdb8b 100644 --- a/flake.nix +++ b/flake.nix @@ -1,15 +1,57 @@ { - inputs.nixpkgs.url = "github:NixOS/nixpkgs/22.11"; + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/22.11"; + agenix.url = "github:ryantm/agenix"; + agenix.inputs.nixpkgs.follows = "nixpkgs"; + bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git"; + }; - outputs = { nixpkgs, ... }: { + outputs = { self, nixpkgs, agenix, bscpkgs, ... }: { nixosConfigurations = { xeon01 = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; - modules = [ ./xeon01/configuration.nix ]; + modules = [ + ( {options, ...}: { + # Sel the nixos-config path to the one of the current flake + nixpkgs.overlays = [ bscpkgs.bscOverlay ]; + nix.nixPath = [ + "nixpkgs=${nixpkgs}" + "bscpkgs=${bscpkgs}" + "nixos-config=${self.outPath}/xeon01/configuration.nix" + "nixpkgs-overlays=${self.outPath}/overlays-compat" + ]; + nix.registry.nixpkgs.flake = nixpkgs; + nix.registry.bscpkgs.flake = bscpkgs; + system.configurationRevision = + if self ? rev + then self.rev + else throw ("Refusing to build from a dirty Git tree!"); + }) + ./xeon01/configuration.nix + ]; }; xeon07 = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; - modules = [ ./xeon07/configuration.nix ]; + modules = [ + ( {options, ...}: { + # Sel the nixos-config path to the one of the current flake + nixpkgs.overlays = [ bscpkgs.bscOverlay ]; + nix.nixPath = [ + "nixpkgs=${nixpkgs}" + "bscpkgs=${bscpkgs}" + "nixos-config=${self.outPath}/xeon07/configuration.nix" + "nixpkgs-overlays=${self.outPath}/overlays-compat" + ]; + nix.registry.nixpkgs.flake = nixpkgs; + nix.registry.bscpkgs.flake = bscpkgs; + system.configurationRevision = + if self ? rev + then self.rev + else throw ("Refusing to build from a dirty Git tree!"); + }) + agenix.nixosModules.default + ./xeon07/configuration.nix + ]; }; }; }; diff --git a/rebuild.sh b/rebuild.sh index 592ed38..5df9616 100755 --- a/rebuild.sh +++ b/rebuild.sh @@ -6,11 +6,14 @@ if [ "$(id -u)" != 0 ]; then fi host=$(hostname) -conf="$(readlink -f .)/${host}/configuration.nix" -if [ ! -e "$conf" ]; then - echo "Missing config $conf" - exit 1 -fi +#conf="$(readlink -f .)/${host}/configuration.nix" +# +#if [ ! -e "$conf" ]; then +# echo "Missing config $conf" +# exit 1 +#fi +# +#NIXOS_CONFIG="${conf}" nixos-rebuild switch -NIXOS_CONFIG="${conf}" nixos-rebuild switch +nixos-rebuild switch --flake . diff --git a/xeon01/slurm.nix b/xeon01/slurm.nix index 815c94a..672bfc5 100644 --- a/xeon01/slurm.nix +++ b/xeon01/slurm.nix @@ -6,7 +6,7 @@ controlMachine = "xeon07"; clusterName = "owl"; nodeName = [ - "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" + "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" ]; }; } diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index df63508..4cb68cc 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -8,8 +8,6 @@ ./monitoring.nix ./nfs.nix ./slurm.nix - - ]; # Select the this using the ID to avoid mismatches @@ -26,8 +24,4 @@ prefixLength = 24; } ]; }; - - environment.systemPackages = with pkgs; [ - (pkgs.callPackage {}) - ]; } -- 2.49.0 From 5dbbb27c4391616c666a5200de41c71995f2ddea Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Apr 2023 16:27:04 +0200 Subject: [PATCH 043/472] Refacto slurm configuration into compute/control --- common/main.nix | 1 + {xeon01 => common}/slurm.nix | 0 xeon01/configuration.nix | 5 +---- xeon07/configuration.nix | 2 +- xeon07/slurm-daemon.nix | 10 ++++++++++ xeon07/slurm.nix | 17 ----------------- 6 files changed, 13 insertions(+), 22 deletions(-) rename {xeon01 => common}/slurm.nix (100%) create mode 100644 xeon07/slurm-daemon.nix delete mode 100644 xeon07/slurm.nix diff --git a/common/main.nix b/common/main.nix index ad54622..8a76649 100644 --- a/common/main.nix +++ b/common/main.nix @@ -6,6 +6,7 @@ ./fs.nix ./hw.nix ./net.nix + ./slurm.nix ./ssh.nix ./users.nix ]; diff --git a/xeon01/slurm.nix b/common/slurm.nix similarity index 100% rename from xeon01/slurm.nix rename to common/slurm.nix diff --git a/xeon01/configuration.nix b/xeon01/configuration.nix index 9f651d8..1daecec 100644 --- a/xeon01/configuration.nix +++ b/xeon01/configuration.nix @@ -1,10 +1,7 @@ { config, pkgs, ... }: { - imports = [ - ../common/main.nix - ./slurm.nix - ]; + imports = [ ../common/main.nix ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c"; diff --git a/xeon07/configuration.nix b/xeon07/configuration.nix index 4cb68cc..f3b6ab4 100644 --- a/xeon07/configuration.nix +++ b/xeon07/configuration.nix @@ -7,7 +7,7 @@ ./gitlab-runner.nix ./monitoring.nix ./nfs.nix - ./slurm.nix + ./slurm-daemon.nix ]; # Select the this using the ID to avoid mismatches diff --git a/xeon07/slurm-daemon.nix b/xeon07/slurm-daemon.nix new file mode 100644 index 0000000..a8dd3b8 --- /dev/null +++ b/xeon07/slurm-daemon.nix @@ -0,0 +1,10 @@ +{ ... }: + +{ + services.slurm = { + server.enable = true; + partitionName = [ + "xeon Nodes=xeon[01-02,07] Default=YES MaxTime=INFINITE State=UP" + ]; + }; +} diff --git a/xeon07/slurm.nix b/xeon07/slurm.nix deleted file mode 100644 index 48a77d7..0000000 --- a/xeon07/slurm.nix +++ /dev/null @@ -1,17 +0,0 @@ -{ ... }: - -{ - services.slurm = { - client.enable = true; - server.enable = true; - controlMachine = "xeon07"; - clusterName = "owl"; - nodeName = [ - "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" - ]; - - partitionName = [ - "xeon Nodes=xeon[01-02,07] Default=YES MaxTime=INFINITE State=UP" - ]; - }; -} -- 2.49.0 From a211e9ebee00eb36bf4721b2cc23cc26ebd6ce17 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Apr 2023 16:28:12 +0200 Subject: [PATCH 044/472] Add xeon02 configuration --- flake.nix | 22 ++++++++++++++++++++++ xeon02/configuration.nix | 20 ++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 xeon02/configuration.nix diff --git a/flake.nix b/flake.nix index 70cdb8b..a4c037d 100644 --- a/flake.nix +++ b/flake.nix @@ -30,6 +30,28 @@ ./xeon01/configuration.nix ]; }; + xeon02 = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + ( {options, ...}: { + # Sel the nixos-config path to the one of the current flake + nixpkgs.overlays = [ bscpkgs.bscOverlay ]; + nix.nixPath = [ + "nixpkgs=${nixpkgs}" + "bscpkgs=${bscpkgs}" + "nixos-config=${self.outPath}/xeon02/configuration.nix" + "nixpkgs-overlays=${self.outPath}/overlays-compat" + ]; + nix.registry.nixpkgs.flake = nixpkgs; + nix.registry.bscpkgs.flake = bscpkgs; + system.configurationRevision = + if self ? rev + then self.rev + else throw ("Refusing to build from a dirty Git tree!"); + }) + ./xeon02/configuration.nix + ]; + }; xeon07 = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; modules = [ diff --git a/xeon02/configuration.nix b/xeon02/configuration.nix new file mode 100644 index 0000000..5d097dd --- /dev/null +++ b/xeon02/configuration.nix @@ -0,0 +1,20 @@ +{ config, pkgs, ... }: + +{ + imports = [ ../common/main.nix ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629"; + + networking = { + hostName = "xeon02"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.2"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.2"; + prefixLength = 24; + } ]; + }; +} -- 2.49.0 From f12ba9f8b08e24b66c671522c547593e9078cdce Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Apr 2023 16:36:15 +0200 Subject: [PATCH 045/472] Add minimal netboot module to build kexec image --- common/boot.nix | 2 +- xeon02/configuration.nix | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/common/boot.nix b/common/boot.nix index cf85951..1ff695f 100644 --- a/common/boot.nix +++ b/common/boot.nix @@ -2,7 +2,7 @@ { # Use the GRUB 2 boot loader. - boot.loader.grub.enable = true; + boot.loader.grub.enable = lib.mkForce true; boot.loader.grub.version = 2; # Enable GRUB2 serial console diff --git a/xeon02/configuration.nix b/xeon02/configuration.nix index 5d097dd..83d4114 100644 --- a/xeon02/configuration.nix +++ b/xeon02/configuration.nix @@ -1,10 +1,12 @@ -{ config, pkgs, ... }: +{ config, pkgs, modulesPath, lib, ... }: { - imports = [ ../common/main.nix ]; + imports = [ ../common/main.nix (modulesPath + "/installer/netboot/netboot-minimal.nix") ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629"; + #programs.ssh.forwardX11 = false; + programs.ssh.setXAuthLocation = lib.mkForce true; networking = { hostName = "xeon02"; -- 2.49.0 From c0b23ad450ceda1695857e55799eb80003f2a869 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Apr 2023 16:36:48 +0200 Subject: [PATCH 046/472] Add steps in install documentation --- doc/install.md | 62 ++++++++++++++++++++++++++++++++++++++++ xeon02/configuration.nix | 7 +++-- 2 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 doc/install.md diff --git a/doc/install.md b/doc/install.md new file mode 100644 index 0000000..05a6022 --- /dev/null +++ b/doc/install.md @@ -0,0 +1,62 @@ +# Installing NixOS in a new node + +This article shows the steps to install NixOS in a node following the +configuration of the repo. + +## Prepare the disk + +Create a main partition and label it `nixos` following [the manual][1]. + +[1]: https://nixos.org/manual/nixos/stable/index.html#sec-installation-manual-partitioning. + +``` +# disk=/dev/sdX +# parted $disk -- mklabel msdos +# parted $disk -- mkpart primary 1MB 100% +# parted $disk -- set 1 boot on +``` + +Then create an etx4 filesystem, labeled `nixos` where the system will be +installed. **Ensure that no other partition has the same label.** + +``` +# mkfs.ext4 -L nixos "${disk}1" +# mount ${disk}1 /mnt +# lsblk -f $disk +NAME FSTYPE LABEL UUID MOUNTPOINT +sdX +`-sdX1 ext4 nixos 10d73b75-809c-4fa3-b99d-4fab2f0d0d8e /mnt +``` + +## Prepare nix and nixos-install + +Mount the nix store from the xeon07 node in read-only /nix. + +``` +# mkdir /nix +# mount -o ro xeon07:/nix /nix +``` + +Get the nix binary and nixos-install tool from xeon07: + +``` +# ssh xeon07 'readlink -f $(which nix)' +/nix/store/0sxbaj71c4c4n43qhdxm31f56gjalksw-nix-2.13.3/bin/nix +# ssh xeon07 'readlink -f $(which nixos-install)' +/nix/store/9yq8ps06ysr2pfiwiij39ny56yk3pdcs-nixos-install/bin/nixos-install +``` + +And add them to the PATH: + +``` +# export PATH=$PATH:/nix/store/0sxbaj71c4c4n43qhdxm31f56gjalksw-nix-2.13.3/bin +# export PATH=$PATH:/nix/store/9yq8ps06ysr2pfiwiij39ny56yk3pdcs-nixos-install/bin/ +# nix --version +nix (Nix) 2.13.3 +``` + +## Build the nixos kexec image + +``` +# nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v +``` diff --git a/xeon02/configuration.nix b/xeon02/configuration.nix index 83d4114..06d88ff 100644 --- a/xeon02/configuration.nix +++ b/xeon02/configuration.nix @@ -1,12 +1,15 @@ { config, pkgs, modulesPath, lib, ... }: { - imports = [ ../common/main.nix (modulesPath + "/installer/netboot/netboot-minimal.nix") ]; + imports = [ + #(modulesPath + "/installer/netboot/netboot-minimal.nix") + ../common/main.nix + ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629"; #programs.ssh.forwardX11 = false; - programs.ssh.setXAuthLocation = lib.mkForce true; + #programs.ssh.setXAuthLocation = lib.mkForce true; networking = { hostName = "xeon02"; -- 2.49.0 From ef2ffa61c3b34a41ae9de77d3f754d825b93e597 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Apr 2023 18:29:32 +0200 Subject: [PATCH 047/472] Update ib interface name in xeon02 It seems to be plugged in another PCI port --- xeon02/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xeon02/configuration.nix b/xeon02/configuration.nix index 06d88ff..1b06afd 100644 --- a/xeon02/configuration.nix +++ b/xeon02/configuration.nix @@ -17,7 +17,7 @@ address = "10.0.40.2"; prefixLength = 24; } ]; - interfaces.ibp5s0.ipv4.addresses = [ { + interfaces.ibp129s0.ipv4.addresses = [ { address = "10.0.42.2"; prefixLength = 24; } ]; -- 2.49.0 From 461d6d2f34dabc06618633c2fa27984e9180debc Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 11:13:46 +0200 Subject: [PATCH 048/472] Update nixpkgs --- flake.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flake.lock b/flake.lock index c8d3383..3a78c3b 100644 --- a/flake.lock +++ b/flake.lock @@ -61,10 +61,10 @@ "nixpkgs": { "locked": { "lastModified": 1669833724, - "narHash": "sha256-mlqo1r+TZUOuypWdrZHluxWL+E5WzXlUXNZ9Y0WLDFU=", + "narHash": "sha256-/HEZNyGbnQecrgJnfE8d0WC5c1xuPSD2LUpB6YXlg4c=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "9a6aabc4740790ef3bbb246b86d029ccf6759658", + "rev": "4d2b37a84fad1091b9de401eb450aae66f1a741e", "type": "github" }, "original": { -- 2.49.0 From 87c4521de34638f1d3b594559bc9137fb6f2899c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 11:18:37 +0200 Subject: [PATCH 049/472] Update nixpkgs to nixos-unstable --- flake.lock | 8 ++++---- flake.nix | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/flake.lock b/flake.lock index 3a78c3b..a8e8e79 100644 --- a/flake.lock +++ b/flake.lock @@ -60,16 +60,16 @@ }, "nixpkgs": { "locked": { - "lastModified": 1669833724, - "narHash": "sha256-/HEZNyGbnQecrgJnfE8d0WC5c1xuPSD2LUpB6YXlg4c=", + "lastModified": 1682526928, + "narHash": "sha256-2cKh4O6t1rQ8Ok+v16URynmb0rV7oZPEbXkU0owNLQs=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "4d2b37a84fad1091b9de401eb450aae66f1a741e", + "rev": "d6b863fd9b7bb962e6f9fdf292419a775e772891", "type": "github" }, "original": { "owner": "NixOS", - "ref": "22.11", + "ref": "nixos-unstable", "repo": "nixpkgs", "type": "github" } diff --git a/flake.nix b/flake.nix index a4c037d..7c4b67d 100644 --- a/flake.nix +++ b/flake.nix @@ -1,6 +1,6 @@ { inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/22.11"; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; agenix.url = "github:ryantm/agenix"; agenix.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git"; -- 2.49.0 From 53f6dcec8dab8bd3a1f6938e1abde1a0903439ef Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 11:19:47 +0200 Subject: [PATCH 050/472] Disable osnoise and hwlat tracer for now Reuse nix cache to avoid rebuilding the kernel. --- common/boot.nix | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/common/boot.nix b/common/boot.nix index 1ff695f..a893e44 100644 --- a/common/boot.nix +++ b/common/boot.nix @@ -18,14 +18,14 @@ "console=ttyS0,115200" ]; - boot.kernelPatches = lib.singleton { - name = "osnoise-tracer"; - patch = null; - extraStructuredConfig = with lib.kernel; { - OSNOISE_TRACER = yes; - HWLAT_TRACER = yes; - }; - }; + #boot.kernelPatches = lib.singleton { + # name = "osnoise-tracer"; + # patch = null; + # extraStructuredConfig = with lib.kernel; { + # OSNOISE_TRACER = yes; + # HWLAT_TRACER = yes; + # }; + #}; boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "ehci_pci" "nvme" "usbhid" "sd_mod" ]; boot.initrd.kernelModules = [ ]; -- 2.49.0 From be69070f6104b050834a66d325c3a04d07dffe7b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 11:50:43 +0200 Subject: [PATCH 051/472] Use the latest kernel --- common/boot.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/boot.nix b/common/boot.nix index a893e44..ca18e0e 100644 --- a/common/boot.nix +++ b/common/boot.nix @@ -1,4 +1,4 @@ -{ lib, ... }: +{ lib, pkgs, ... }: { # Use the GRUB 2 boot loader. @@ -18,6 +18,8 @@ "console=ttyS0,115200" ]; + boot.kernelPackages = pkgs.linuxPackages_latest; + #boot.kernelPatches = lib.singleton { # name = "osnoise-tracer"; # patch = null; -- 2.49.0 From a5a0fd9b6f73ddda5fd156d2bcf433f087fa4c89 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 12:34:51 +0200 Subject: [PATCH 052/472] Increase locked memory to 1 GiB --- common/main.nix | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/common/main.nix b/common/main.nix index 8a76649..b0147f9 100644 --- a/common/main.nix +++ b/common/main.nix @@ -22,6 +22,16 @@ serviceConfig.Restart = "always"; }; + # Increase limits + security.pam.loginLimits = [ + { + domain = "*"; + type = "-"; + item = "memlock"; + value = "1048576"; # 1 GiB of mem locked + } + ]; + time.timeZone = "Europe/Madrid"; i18n.defaultLocale = "en_DK.UTF-8"; -- 2.49.0 From 9767238c7636dbd7e57819b5b35012b24ac0e527 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 17:07:48 +0200 Subject: [PATCH 053/472] Use pmix by default in slurm --- common/slurm.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/common/slurm.nix b/common/slurm.nix index 672bfc5..c1b09d6 100644 --- a/common/slurm.nix +++ b/common/slurm.nix @@ -8,5 +8,8 @@ nodeName = [ "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" ]; + extraConfig = '' + MpiDefault=pmix + ''; }; } -- 2.49.0 From e37f9e2b0fd967f6f1198dd2242a0c686c4e4168 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 17:59:19 +0200 Subject: [PATCH 054/472] Roolback to bash as default shell Zsh doesn't behave properly, it needs further configuration. --- common/main.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/common/main.nix b/common/main.nix index b0147f9..0b76d22 100644 --- a/common/main.nix +++ b/common/main.nix @@ -48,7 +48,6 @@ programs.zsh.enable = true; programs.zsh.histSize = 100000; - users.defaultUserShell = pkgs.zsh; # Copy the NixOS configuration file and link it from the resulting system # (/run/current-system/configuration.nix). This is useful in case you -- 2.49.0 From 6d16772d07fd29d456fc81033aceaa9997898922 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Apr 2023 18:12:10 +0200 Subject: [PATCH 055/472] Simplify bash prompt --- common/main.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common/main.nix b/common/main.nix index 0b76d22..c7fc6ce 100644 --- a/common/main.nix +++ b/common/main.nix @@ -49,6 +49,10 @@ programs.zsh.enable = true; programs.zsh.histSize = 100000; + programs.bash.promptInit = '' + PS1="\h\\$ " + ''; + # Copy the NixOS configuration file and link it from the resulting system # (/run/current-system/configuration.nix). This is useful in case you # accidentally delete configuration.nix. -- 2.49.0 From 89049d0b1fbde912ea7af48897bed181ad90fff3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 2 May 2023 17:38:10 +0200 Subject: [PATCH 056/472] Allow 5 concurrent buils in the gitlab-runner --- xeon07/gitlab-runner.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/xeon07/gitlab-runner.nix b/xeon07/gitlab-runner.nix index ecb5118..81897ef 100644 --- a/xeon07/gitlab-runner.nix +++ b/xeon07/gitlab-runner.nix @@ -6,6 +6,7 @@ services.gitlab-runner = { enable = true; + settings.concurrent = 5; services = { ovni-shell = { registrationConfigFile = config.age.secrets."secrets/ovni-token".path; -- 2.49.0 From ebb5e94416babf60588914bb6a823a87a9ee6389 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 2 May 2023 17:47:57 +0200 Subject: [PATCH 057/472] Increase the number of CPUs to 56 for nOS-V docker --- xeon07/gitlab-runner.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xeon07/gitlab-runner.nix b/xeon07/gitlab-runner.nix index 81897ef..6255005 100644 --- a/xeon07/gitlab-runner.nix +++ b/xeon07/gitlab-runner.nix @@ -30,7 +30,10 @@ registrationConfigFile = config.age.secrets."secrets/nosv-token".path; dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; - registrationFlags = [ "--docker-network-mode host" ]; + registrationFlags = [ + "--docker-network-mode host" + "--docker-cpus 56" + ]; environmentVariables = { https_proxy = "http://localhost:23080"; http_proxy = "http://localhost:23080"; -- 2.49.0 From cdb0688ec1b373080f3f4f01b39b80ab0a465747 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 9 May 2023 18:37:38 +0200 Subject: [PATCH 058/472] Add hal ssh key --- common/ssh.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/common/ssh.nix b/common/ssh.nix index fa043bf..8d069d7 100644 --- a/common/ssh.nix +++ b/common/ssh.nix @@ -20,6 +20,7 @@ rarias.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal" ]; }; -- 2.49.0 From 5e8ff50c98009d4994b26447eac909497753bbe4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 9 May 2023 18:53:31 +0200 Subject: [PATCH 059/472] Allow public dashboards in grafana --- xeon07/monitoring.nix | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xeon07/monitoring.nix b/xeon07/monitoring.nix index 3190d06..d6438d7 100644 --- a/xeon07/monitoring.nix +++ b/xeon07/monitoring.nix @@ -3,9 +3,12 @@ { services.grafana = { enable = true; - settings.server = { - http_port = 2342; - http_addr = "127.0.0.1"; + settings = { + server = { + http_port = 2342; + http_addr = "127.0.0.1"; + }; + feature_toggles.publicDashboards = true; }; }; -- 2.49.0 From 882161b21e703c6ad6b4e4bc0737210cf18d1698 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 May 2023 12:48:04 +0200 Subject: [PATCH 060/472] Automatically resume restarted nodes in SLURM --- common/slurm.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/common/slurm.nix b/common/slurm.nix index c1b09d6..e4890e3 100644 --- a/common/slurm.nix +++ b/common/slurm.nix @@ -10,6 +10,7 @@ ]; extraConfig = '' MpiDefault=pmix + ReturnToService=2 ''; }; } -- 2.49.0 From fd1b467a60ade1d7636c90d3fd8855fc058a1f66 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Mon, 8 May 2023 16:45:40 +0200 Subject: [PATCH 061/472] Add nixos-config.nix to easily enable nix repl --- nixos-config.nix | 1 + 1 file changed, 1 insertion(+) create mode 100644 nixos-config.nix diff --git a/nixos-config.nix b/nixos-config.nix new file mode 100644 index 0000000..2e36516 --- /dev/null +++ b/nixos-config.nix @@ -0,0 +1 @@ +(builtins.getFlake (toString ./.)).nixosConfigurations -- 2.49.0 From a5c7205481cced2dc999e79b6dbcd0122067e64f Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 5 May 2023 20:18:01 +0200 Subject: [PATCH 062/472] Add xeon08 basic config --- common/ssh.nix | 5 ++++ flake.nix | 22 ++++++++++++++++++ xeon08/configuration.nix | 32 +++++++++++++++++++++++++ xeon08/fs.nix | 13 +++++++++++ xeon08/kernel/kernel.nix | 50 ++++++++++++++++++++++++++++++++++++++++ xeon08/slurm.nix | 7 ++++++ xeon08/users.nix | 23 ++++++++++++++++++ 7 files changed, 152 insertions(+) create mode 100644 xeon08/configuration.nix create mode 100644 xeon08/fs.nix create mode 100644 xeon08/kernel/kernel.nix create mode 100644 xeon08/slurm.nix create mode 100644 xeon08/users.nix diff --git a/common/ssh.nix b/common/ssh.nix index 8d069d7..d3676f7 100644 --- a/common/ssh.nix +++ b/common/ssh.nix @@ -16,12 +16,17 @@ root.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" ]; rarias.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal" ]; + arocanon.openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland" + ]; }; programs.ssh.knownHosts = { diff --git a/flake.nix b/flake.nix index 7c4b67d..e07bff9 100644 --- a/flake.nix +++ b/flake.nix @@ -75,6 +75,28 @@ ./xeon07/configuration.nix ]; }; + xeon08 = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + ( {options, ...}: { + # Sel the nixos-config path to the one of the current flake + nixpkgs.overlays = [ bscpkgs.bscOverlay ]; + nix.nixPath = [ + "nixpkgs=${nixpkgs}" + "bscpkgs=${bscpkgs}" + "nixos-config=${self.outPath}/xeon08/configuration.nix" + "nixpkgs-overlays=${self.outPath}/overlays-compat" + ]; + nix.registry.nixpkgs.flake = nixpkgs; + nix.registry.bscpkgs.flake = bscpkgs; + system.configurationRevision = + if self ? rev + then self.rev + else throw ("Refusing to build from a dirty Git tree!"); + }) + ./xeon08/configuration.nix + ]; + }; }; }; } diff --git a/xeon08/configuration.nix b/xeon08/configuration.nix new file mode 100644 index 0000000..519c954 --- /dev/null +++ b/xeon08/configuration.nix @@ -0,0 +1,32 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/main.nix + #(modulesPath + "/installer/netboot/netboot-minimal.nix") + + ./kernel/kernel.nix + ./fs.nix + ./users.nix + ./slurm.nix + ]; + + # Select this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53564b"; + + # disable automatic garbage collector + nix.gc.automatic = lib.mkForce false; + + # set up both ethernet and infiniband ips + networking = { + hostName = "xeon08"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.8"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.8"; + prefixLength = 24; + } ]; + }; +} diff --git a/xeon08/fs.nix b/xeon08/fs.nix new file mode 100644 index 0000000..1c1526a --- /dev/null +++ b/xeon08/fs.nix @@ -0,0 +1,13 @@ +{ ... }: + +{ + fileSystems."/nix" = { + device = "/dev/disk/by-label/optane"; + fsType = "ext4"; + neededForBoot = true; + }; + fileSystems."/mnt/data" = { + device = "/dev/disk/by-label/data"; + fsType = "ext4"; + }; +} diff --git a/xeon08/kernel/kernel.nix b/xeon08/kernel/kernel.nix new file mode 100644 index 0000000..5aca93c --- /dev/null +++ b/xeon08/kernel/kernel.nix @@ -0,0 +1,50 @@ +{ pkgs, lib, ... }: + +let + #fcs-devel = pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = /mnt/data/kernel/fcs/kernel/src; + # configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig; + #}; + + #fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false; + #fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false; + #fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true; + #fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true; + #fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = builtins.fetchGit { + # url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + # rev = gitCommit; + # ref = "fcs"; + # }; + # configfile = if lockdep then ./configs/lockdep else ./configs/defconfig; + #}; + + kernel = nixos-fcsv2; + + nixos-fcs-kernel = {gitCommit, lockStat ? false}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + version = "6.2.8"; + src = builtins.fetchGit { + url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + rev = gitCommit; + ref = "fcs"; + }; + structuredExtraConfig = with lib.kernel; { + # add cutom kernel options here + } // lib.optionalAttrs lockStat { + LOCK_STAT = yes; + }; + kernelPatches = []; + extraMeta.branch = lib.versions.majorMinor version; + }); + + nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";}; + nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";}; + nixos-fcsv1-lockstat = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be"; lockStat = true;}; + nixos-fcsv2-lockstat = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; lockStat = true;}; + latest = pkgs.linuxPackages_latest; + +in { + boot.kernelPackages = lib.mkForce kernel; +} diff --git a/xeon08/slurm.nix b/xeon08/slurm.nix new file mode 100644 index 0000000..0aa3bda --- /dev/null +++ b/xeon08/slurm.nix @@ -0,0 +1,7 @@ +{ lib, ... }: + +{ + services.slurm = { + client.enable = lib.mkForce false; + }; +} diff --git a/xeon08/users.nix b/xeon08/users.nix new file mode 100644 index 0000000..4a01344 --- /dev/null +++ b/xeon08/users.nix @@ -0,0 +1,23 @@ +{ ... }: + +{ + users = { + users.arocanon = { + uid = 1042; + isNormalUser = true; + home = "/home/Computational/arocanon"; + description = "Aleix Roca"; + group = "Computational"; + extraGroups = [ "wheel" ]; + hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; + }; + }; + + security.sudo.extraRules= [{ + users = [ "arocanon" ]; + commands = [{ + command = "ALL" ; + options= [ "NOPASSWD" ]; # "SETENV" # Adding the following could be a good idea + }]; + }]; +} -- 2.49.0 From 5b82a726473ba58bc7450e64a2c8ff89e88f9eab Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Thu, 11 May 2023 17:25:48 +0200 Subject: [PATCH 063/472] Set intel_pstate=passive and disable frequency boost --- xeon08/configuration.nix | 1 + xeon08/cpufreq.nix | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 xeon08/cpufreq.nix diff --git a/xeon08/configuration.nix b/xeon08/configuration.nix index 519c954..7899173 100644 --- a/xeon08/configuration.nix +++ b/xeon08/configuration.nix @@ -6,6 +6,7 @@ #(modulesPath + "/installer/netboot/netboot-minimal.nix") ./kernel/kernel.nix + ./cpufreq.nix ./fs.nix ./users.nix ./slurm.nix diff --git a/xeon08/cpufreq.nix b/xeon08/cpufreq.nix new file mode 100644 index 0000000..29498c4 --- /dev/null +++ b/xeon08/cpufreq.nix @@ -0,0 +1,40 @@ +{ lib, ... }: + +{ + # Disable frequency boost by default. Use the intel_pstate driver instead of + # acpi_cpufreq driver because the acpi_cpufreq driver does not read the + # complete range of P-States [1]. Use the intel_pstate passive mode [2] to + # disable HWP, which allows a core to "select P-states by itself". Also, this + # disables intel governors, which confusingly, have the same names as the + # generic ones but behave differently [3]. + + # Essentially, we use the generic governors, but use the intel driver to read + # the P-state list. + + # [1] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#intel-pstate-vs-acpi-cpufreq + # [2] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#passive-mode + # [3] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#active-mode + # https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html + + # set intel_pstate to passive mode + boot.kernelParams = [ + "intel_pstate=passive" + ]; + # Disable frequency boost + system.activationScripts = { + disableFrequencyBoost.text = '' + echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo + ''; + }; + + ## disable intel_pstate + #boot.kernelParams = [ + # "intel_pstate=disable" + #]; + ## Disable frequency boost + #system.activationScripts = { + # disableFrequencyBoost.text = '' + # echo 0 > /sys/devices/system/cpu/cpufreq/boost + # ''; + #}; +} -- 2.49.0 From 5ebb57deff7663ac82609d5d5b5da1befa3a0f13 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Wed, 10 May 2023 17:38:11 +0200 Subject: [PATCH 064/472] Add gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..17543c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.swp +/result -- 2.49.0 From b72d9936a2f396b59fca00b6b5f46ca95ad935df Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Wed, 10 May 2023 10:58:27 +0200 Subject: [PATCH 065/472] Improve documentation --- doc/install.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/doc/install.md b/doc/install.md index 05a6022..41e7684 100644 --- a/doc/install.md +++ b/doc/install.md @@ -12,7 +12,8 @@ Create a main partition and label it `nixos` following [the manual][1]. ``` # disk=/dev/sdX # parted $disk -- mklabel msdos -# parted $disk -- mkpart primary 1MB 100% +# parted $disk -- mkpart primary 1MB -8GB +# parted $disk -- mkpart primary linux-swap -8GB 100% # parted $disk -- set 1 boot on ``` @@ -21,6 +22,7 @@ installed. **Ensure that no other partition has the same label.** ``` # mkfs.ext4 -L nixos "${disk}1" +# mkswap -L swap "${disk}2" # mount ${disk}1 /mnt # lsblk -f $disk NAME FSTYPE LABEL UUID MOUNTPOINT @@ -55,6 +57,66 @@ And add them to the PATH: nix (Nix) 2.13.3 ``` +## Adapt owl configuration + +Clone owl repo: + +``` +$ git clone git@bscpm03.bsc.es:rarias/owl.git +$ cd owl +``` + +Edit the configuration to your needs. + +## Install from another Linux OS + +Install nixOS into the storage drive. + +``` +# nixos-install --flake --root /mnt .#xeon0X +``` + +At this point, the nixOS grub has been installed into the nixos device, which +is not the default boot device. To keep both the old Linux and NixOS grubs, add +an entry into the old Linux grub to jump into the new grub. + +``` +# echo " + +menuentry 'NixOS' { + insmod chain + search --no-floppy --label nixos --set root + configfile /boot/grub/grub.cfg +} " >> /etc/grub.d/40_custom +``` + +Rebuild grub config. + +``` +# grub2-mkconfig -o /boot/grub/grub.cfg +``` + +To boot into NixOS manually, reboot and select NixOS in the grub menu to boot +into NixOS. + +To temporarily boot into NixOS only on the next reboot run: + +``` +# grub2-reboot 'NixOS' +``` + +To permanently boot into NixOS as the default boot OS, edit `/etc/default/grub/`: + +``` +GRUB_DEFAULT='NixOS' +``` + +And update grub. + +``` +# grub2-mkconfig -o /boot/grub/grub.cfg +``` + ## Build the nixos kexec image ``` -- 2.49.0 From 59bf51dfdea86173221ab75265509f606feb4a39 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 May 2023 13:50:34 +0200 Subject: [PATCH 066/472] Add ix to common packages --- common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/main.nix b/common/main.nix index c7fc6ce..c991e63 100644 --- a/common/main.nix +++ b/common/main.nix @@ -13,7 +13,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors + nix-diff ipmitool freeipmi ethtool lm_sensors ix ]; systemd.services."serial-getty@ttyS0" = { -- 2.49.0 From 1bf6747b3a0a322b3b251804de75bf22760be4fd Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 May 2023 18:28:49 +0200 Subject: [PATCH 067/472] Add cmake to system packages --- common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/main.nix b/common/main.nix index c991e63..22d4d6b 100644 --- a/common/main.nix +++ b/common/main.nix @@ -13,7 +13,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors ix + nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake ]; systemd.services."serial-getty@ttyS0" = { -- 2.49.0 From 470b3d25125e6295f32a4cbc1db468e29be0c0da Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 May 2023 18:31:48 +0200 Subject: [PATCH 068/472] Add gnumake to system packages --- common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/main.nix b/common/main.nix index 22d4d6b..78e13e0 100644 --- a/common/main.nix +++ b/common/main.nix @@ -13,7 +13,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake + nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake ]; systemd.services."serial-getty@ttyS0" = { -- 2.49.0 From 0af185afd8ac5ec2f9b2cf8c96dae093d86762ba Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 May 2023 18:56:01 +0200 Subject: [PATCH 069/472] Add file to system packages --- common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/main.nix b/common/main.nix index 78e13e0..97c7673 100644 --- a/common/main.nix +++ b/common/main.nix @@ -13,7 +13,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake + nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file ]; systemd.services."serial-getty@ttyS0" = { -- 2.49.0 From c7692995f4a64467ec83c8c0994612a8f069bee1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 31 May 2023 17:06:09 +0200 Subject: [PATCH 070/472] Add tree command --- common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/main.nix b/common/main.nix index 97c7673..d993956 100644 --- a/common/main.nix +++ b/common/main.nix @@ -13,7 +13,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file + nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree ]; systemd.services."serial-getty@ttyS0" = { -- 2.49.0 From 1c7de2f7c9399ac73eccce5bbd458a78e0a3b94a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 31 May 2023 17:23:08 +0200 Subject: [PATCH 071/472] Serve grafana in https://jungle.bsc.es/grafana --- xeon07/monitoring.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xeon07/monitoring.nix b/xeon07/monitoring.nix index d6438d7..c55d366 100644 --- a/xeon07/monitoring.nix +++ b/xeon07/monitoring.nix @@ -5,6 +5,9 @@ enable = true; settings = { server = { + domain = "jungle.bsc.es"; + root_url = "%(protocol)s://%(domain)s/grafana"; + serve_from_sub_path = true; http_port = 2342; http_addr = "127.0.0.1"; }; -- 2.49.0 From 5421eab09aac69c36951a5aa5cbfa2f4c881c27a Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Wed, 7 Jun 2023 19:52:24 +0200 Subject: [PATCH 072/472] xeon08: Add lttng module and tools --- xeon08/kernel/kernel.nix | 3 +++ xeon08/kernel/lttng.nix | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 xeon08/kernel/lttng.nix diff --git a/xeon08/kernel/kernel.nix b/xeon08/kernel/kernel.nix index 5aca93c..ae3b91d 100644 --- a/xeon08/kernel/kernel.nix +++ b/xeon08/kernel/kernel.nix @@ -46,5 +46,8 @@ let latest = pkgs.linuxPackages_latest; in { + imports = [ + ./lttng.nix + ]; boot.kernelPackages = lib.mkForce kernel; } diff --git a/xeon08/kernel/lttng.nix b/xeon08/kernel/lttng.nix new file mode 100644 index 0000000..b9d6e4d --- /dev/null +++ b/xeon08/kernel/lttng.nix @@ -0,0 +1,36 @@ +{ config, pkgs, lib, ... }: + +let + + # the lttng btrfs probe crashes at compile time because of an undefined + # function. This disables the btrfs tracepoints to avoid the issue. + lttng-modules-fixed = config.boot.kernelPackages.lttng-modules.overrideAttrs (finalAttrs: previousAttrs: { + patchPhase = (lib.optionalString (previousAttrs ? patchPhase) previousAttrs.patchPhase) + '' + substituteInPlace src/probes/Kbuild \ + --replace " obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o" " #obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o" + ''; + }); +in { + + # add the lttng tools and modules to the system environment + boot.extraModulePackages = [ lttng-modules-fixed ]; + environment.systemPackages = with pkgs; [ + lttng-tools lttng-ust babeltrace + ]; + + # start the lttng root daemon to manage kernel events + systemd.services.lttng-sessiond = { + wantedBy = [ "multi-user.target" ]; + description = "LTTng session daemon for the root user"; + serviceConfig = { + User = "root"; + ExecStart = '' + ${pkgs.lttng-tools}/bin/lttng-sessiond + ''; + }; + }; + + # members of the tracing group can use the lttng-provided kernel events + # without root permissions + users.groups.tracing.members = [ "arocanon" ]; +} -- 2.49.0 From d35becb6633e8e6459e86ba6ce9cd43dadcf50a7 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 9 Jun 2023 08:04:30 +0200 Subject: [PATCH 073/472] xeon08: Enable lttng lockdep tracepoints --- xeon08/configuration.nix | 4 ++++ xeon08/kernel/lttng.nix | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/xeon08/configuration.nix b/xeon08/configuration.nix index 7899173..195117f 100644 --- a/xeon08/configuration.nix +++ b/xeon08/configuration.nix @@ -18,6 +18,10 @@ # disable automatic garbage collector nix.gc.automatic = lib.mkForce false; + # members of the tracing group can use the lttng-provided kernel events + # without root permissions + users.groups.tracing.members = [ "arocanon" ]; + # set up both ethernet and infiniband ips networking = { hostName = "xeon08"; diff --git a/xeon08/kernel/lttng.nix b/xeon08/kernel/lttng.nix index b9d6e4d..eb45911 100644 --- a/xeon08/kernel/lttng.nix +++ b/xeon08/kernel/lttng.nix @@ -2,12 +2,23 @@ let - # the lttng btrfs probe crashes at compile time because of an undefined + # The lttng btrfs probe crashes at compile time because of an undefined # function. This disables the btrfs tracepoints to avoid the issue. + + # Also enable lockdep tracepoints, this is disabled by default because it + # does not work well on architectures other than x86_64 (i think that arm) as + # I was told on the mailing list. lttng-modules-fixed = config.boot.kernelPackages.lttng-modules.overrideAttrs (finalAttrs: previousAttrs: { patchPhase = (lib.optionalString (previousAttrs ? patchPhase) previousAttrs.patchPhase) + '' + # disable btrfs substituteInPlace src/probes/Kbuild \ --replace " obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o" " #obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o" + + # enable lockdep tracepoints + substituteInPlace src/probes/Kbuild \ + --replace "#ifneq (\$(CONFIG_LOCKDEP),)" "ifneq (\$(CONFIG_LOCKDEP),)" \ + --replace "# obj-\$(CONFIG_LTTNG) += lttng-probe-lock.o" " obj-\$(CONFIG_LTTNG) += lttng-probe-lock.o" \ + --replace "#endif # CONFIG_LOCKDEP" "endif # CONFIG_LOCKDEP" ''; }); in { @@ -29,8 +40,4 @@ in { ''; }; }; - - # members of the tracing group can use the lttng-provided kernel events - # without root permissions - users.groups.tracing.members = [ "arocanon" ]; } -- 2.49.0 From 0d196af473c2ba92a7f24933774e8a8e3d354388 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 9 Jun 2023 10:58:11 +0200 Subject: [PATCH 074/472] xeon08: Add perf --- xeon08/kernel/kernel.nix | 1 + xeon08/kernel/perf.nix | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 xeon08/kernel/perf.nix diff --git a/xeon08/kernel/kernel.nix b/xeon08/kernel/kernel.nix index ae3b91d..45ecce6 100644 --- a/xeon08/kernel/kernel.nix +++ b/xeon08/kernel/kernel.nix @@ -48,6 +48,7 @@ let in { imports = [ ./lttng.nix + ./perf.nix ]; boot.kernelPackages = lib.mkForce kernel; } diff --git a/xeon08/kernel/perf.nix b/xeon08/kernel/perf.nix new file mode 100644 index 0000000..51340df --- /dev/null +++ b/xeon08/kernel/perf.nix @@ -0,0 +1,22 @@ +{ config, pkgs, lib, ... }: + +{ + # add the perf tool + environment.systemPackages = with pkgs; [ + config.boot.kernelPackages.perf + ]; + + # allow non-root users to read tracing data from the kernel + boot.kernel.sysctl."kernel.perf_event_paranoid" = -2; + boot.kernel.sysctl."kernel.kptr_restrict" = 0; + + # specify additionl options to the tracefs directory to allow members of the + # tracing group to access tracefs. + fileSystems."/sys/kernel/tracing" = { + options = [ + "mode=755" + "gid=tracing" + ]; + }; +} + -- 2.49.0 From 6558a6ab7711bbcc74836b572c27e28d36377f66 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Mon, 12 Jun 2023 17:16:01 +0200 Subject: [PATCH 075/472] xeon08: Add config for kernel non-voluntary preemption --- xeon08/kernel/kernel.nix | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/xeon08/kernel/kernel.nix b/xeon08/kernel/kernel.nix index 45ecce6..10bfead 100644 --- a/xeon08/kernel/kernel.nix +++ b/xeon08/kernel/kernel.nix @@ -23,7 +23,7 @@ let kernel = nixos-fcsv2; - nixos-fcs-kernel = {gitCommit, lockStat ? false}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { version = "6.2.8"; src = builtins.fetchGit { url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; @@ -31,9 +31,12 @@ let ref = "fcs"; }; structuredExtraConfig = with lib.kernel; { - # add cutom kernel options here + # add general custom kernel options here } // lib.optionalAttrs lockStat { LOCK_STAT = yes; + } // lib.optionalAttrs preempt { + PREEMPT = lib.mkForce yes; + PREEMPT_VOLUNTARY = lib.mkForce no; }; kernelPatches = []; extraMeta.branch = lib.versions.majorMinor version; @@ -41,8 +44,19 @@ let nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";}; nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";}; - nixos-fcsv1-lockstat = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be"; lockStat = true;}; - nixos-fcsv2-lockstat = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; lockStat = true;}; + nixos-fcsv1-lockstat = nixos-fcs-kernel { + gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be"; + lockStat = true; + }; + nixos-fcsv2-lockstat = nixos-fcs-kernel { + gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; + lockStat = true; + }; + nixos-fcsv2-lockstat-preempt = nixos-fcs-kernel { + gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; + lockStat = true; + preempt = true; + }; latest = pkgs.linuxPackages_latest; in { -- 2.49.0 From 0b57bbc6e3af95dcc5dd4849dcea33f79bb317bd Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Wed, 14 Jun 2023 16:16:46 +0200 Subject: [PATCH 076/472] Move arocanon user from xeon08 to common --- common/users.nix | 28 ++++++++++++++++++++-------- xeon08/users.nix | 12 ------------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/common/users.nix b/common/users.nix index 03a5eb1..be34db7 100644 --- a/common/users.nix +++ b/common/users.nix @@ -3,14 +3,26 @@ { users = { mutableUsers = false; - users.rarias = { - uid = 1880; - isNormalUser = true; - home = "/home/Computational/rarias"; - description = "Rodrigo Arias"; - group = "Computational"; - extraGroups = [ "wheel" ]; - hashedPassword = "$6$u06tkCy13enReBsb$xiI.twRvvTfH4jdS3s68NZ7U9PSbGKs5.LXU/UgoawSwNWhZo2hRAjNL5qG0/lAckzcho2LjD0r3NfVPvthY6/"; + users = { + rarias = { + uid = 1880; + isNormalUser = true; + home = "/home/Computational/rarias"; + description = "Rodrigo Arias"; + group = "Computational"; + extraGroups = [ "wheel" ]; + hashedPassword = "$6$u06tkCy13enReBsb$xiI.twRvvTfH4jdS3s68NZ7U9PSbGKs5.LXU/UgoawSwNWhZo2hRAjNL5qG0/lAckzcho2LjD0r3NfVPvthY6/"; + }; + + arocanon = { + uid = 1042; + isNormalUser = true; + home = "/home/Computational/arocanon"; + description = "Aleix Roca"; + group = "Computational"; + extraGroups = [ "wheel" ]; + hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; + }; }; groups = { diff --git a/xeon08/users.nix b/xeon08/users.nix index 4a01344..a1cfab4 100644 --- a/xeon08/users.nix +++ b/xeon08/users.nix @@ -1,18 +1,6 @@ { ... }: { - users = { - users.arocanon = { - uid = 1042; - isNormalUser = true; - home = "/home/Computational/arocanon"; - description = "Aleix Roca"; - group = "Computational"; - extraGroups = [ "wheel" ]; - hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; - }; - }; - security.sudo.extraRules= [{ users = [ "arocanon" ]; commands = [{ -- 2.49.0 From 08eaf312f2739e82738a5ea3078f74138b41f9b7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 14 Jun 2023 12:05:15 +0200 Subject: [PATCH 077/472] Add ncdu to system packages --- common/main.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/common/main.nix b/common/main.nix index d993956..fd5296e 100644 --- a/common/main.nix +++ b/common/main.nix @@ -14,6 +14,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree + ncdu ]; systemd.services."serial-getty@ttyS0" = { -- 2.49.0 From a9d740e95a3bfd48c85ba7a401b80b918f8187d4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 14 Jun 2023 13:55:19 +0200 Subject: [PATCH 078/472] Remove profiles older than 30 days with gc --- common/main.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/common/main.nix b/common/main.nix index fd5296e..59d5286 100644 --- a/common/main.nix +++ b/common/main.nix @@ -46,6 +46,7 @@ nix.settings.trusted-users = [ "@wheel" ]; nix.gc.automatic = true; nix.gc.dates = "weekly"; + nix.gc.options = "--delete-older-than 30d"; programs.zsh.enable = true; programs.zsh.histSize = 100000; -- 2.49.0 From 801bb4ba3ce4b1b9cf62b56288a9c55c3cc79850 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 14 Jun 2023 11:15:00 +0200 Subject: [PATCH 079/472] Rename xeon07 to hut --- common/net.nix | 2 +- common/slurm.nix | 2 +- doc/install.md | 10 +++++----- flake.nix | 6 +++--- {xeon07 => hut}/configuration.nix | 2 +- {xeon07 => hut}/gitlab-runner.nix | 0 {xeon07 => hut}/monitoring.nix | 0 {xeon07 => hut}/nfs.nix | 0 {xeon07 => hut}/secrets.nix | 0 {xeon07 => hut}/secrets/nosv-token.age | Bin {xeon07 => hut}/secrets/ovni-token.age | 0 {xeon07 => hut}/slurm-daemon.nix | 0 12 files changed, 11 insertions(+), 11 deletions(-) rename {xeon07 => hut}/configuration.nix (95%) rename {xeon07 => hut}/gitlab-runner.nix (100%) rename {xeon07 => hut}/monitoring.nix (100%) rename {xeon07 => hut}/nfs.nix (100%) rename {xeon07 => hut}/secrets.nix (100%) rename {xeon07 => hut}/secrets/nosv-token.age (100%) rename {xeon07 => hut}/secrets/ovni-token.age (100%) rename {xeon07 => hut}/slurm-daemon.nix (100%) diff --git a/common/net.nix b/common/net.nix index 6e5d5bf..51f80f4 100644 --- a/common/net.nix +++ b/common/net.nix @@ -73,7 +73,7 @@ 10.0.40.106 xeon06-ipmi0 # Node Entry for node: xeon07 (ID=21) - 10.0.40.7 xeon07 xeon07-eth0 + 10.0.40.7 xeon07 xeon07-eth0 hut 10.0.42.7 xeon07-ib0 10.0.40.107 xeon07-ipmi0 diff --git a/common/slurm.nix b/common/slurm.nix index e4890e3..2da5a02 100644 --- a/common/slurm.nix +++ b/common/slurm.nix @@ -3,7 +3,7 @@ { services.slurm = { client.enable = true; - controlMachine = "xeon07"; + controlMachine = "hut"; clusterName = "owl"; nodeName = [ "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" diff --git a/doc/install.md b/doc/install.md index 41e7684..8eae8c7 100644 --- a/doc/install.md +++ b/doc/install.md @@ -32,19 +32,19 @@ sdX ## Prepare nix and nixos-install -Mount the nix store from the xeon07 node in read-only /nix. +Mount the nix store from the hut node in read-only /nix. ``` # mkdir /nix -# mount -o ro xeon07:/nix /nix +# mount -o ro hut:/nix /nix ``` -Get the nix binary and nixos-install tool from xeon07: +Get the nix binary and nixos-install tool from hut: ``` -# ssh xeon07 'readlink -f $(which nix)' +# ssh hut 'readlink -f $(which nix)' /nix/store/0sxbaj71c4c4n43qhdxm31f56gjalksw-nix-2.13.3/bin/nix -# ssh xeon07 'readlink -f $(which nixos-install)' +# ssh hut 'readlink -f $(which nixos-install)' /nix/store/9yq8ps06ysr2pfiwiij39ny56yk3pdcs-nixos-install/bin/nixos-install ``` diff --git a/flake.nix b/flake.nix index e07bff9..d3fc4ae 100644 --- a/flake.nix +++ b/flake.nix @@ -52,7 +52,7 @@ ./xeon02/configuration.nix ]; }; - xeon07 = nixpkgs.lib.nixosSystem { + hut = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; modules = [ ( {options, ...}: { @@ -61,7 +61,7 @@ nix.nixPath = [ "nixpkgs=${nixpkgs}" "bscpkgs=${bscpkgs}" - "nixos-config=${self.outPath}/xeon07/configuration.nix" + "nixos-config=${self.outPath}/hut/configuration.nix" "nixpkgs-overlays=${self.outPath}/overlays-compat" ]; nix.registry.nixpkgs.flake = nixpkgs; @@ -72,7 +72,7 @@ else throw ("Refusing to build from a dirty Git tree!"); }) agenix.nixosModules.default - ./xeon07/configuration.nix + ./hut/configuration.nix ]; }; xeon08 = nixpkgs.lib.nixosSystem { diff --git a/xeon07/configuration.nix b/hut/configuration.nix similarity index 95% rename from xeon07/configuration.nix rename to hut/configuration.nix index f3b6ab4..423b18d 100644 --- a/xeon07/configuration.nix +++ b/hut/configuration.nix @@ -14,7 +14,7 @@ boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; networking = { - hostName = "xeon07"; + hostName = "hut"; interfaces.eno1.ipv4.addresses = [ { address = "10.0.40.7"; prefixLength = 24; diff --git a/xeon07/gitlab-runner.nix b/hut/gitlab-runner.nix similarity index 100% rename from xeon07/gitlab-runner.nix rename to hut/gitlab-runner.nix diff --git a/xeon07/monitoring.nix b/hut/monitoring.nix similarity index 100% rename from xeon07/monitoring.nix rename to hut/monitoring.nix diff --git a/xeon07/nfs.nix b/hut/nfs.nix similarity index 100% rename from xeon07/nfs.nix rename to hut/nfs.nix diff --git a/xeon07/secrets.nix b/hut/secrets.nix similarity index 100% rename from xeon07/secrets.nix rename to hut/secrets.nix diff --git a/xeon07/secrets/nosv-token.age b/hut/secrets/nosv-token.age similarity index 100% rename from xeon07/secrets/nosv-token.age rename to hut/secrets/nosv-token.age diff --git a/xeon07/secrets/ovni-token.age b/hut/secrets/ovni-token.age similarity index 100% rename from xeon07/secrets/ovni-token.age rename to hut/secrets/ovni-token.age diff --git a/xeon07/slurm-daemon.nix b/hut/slurm-daemon.nix similarity index 100% rename from xeon07/slurm-daemon.nix rename to hut/slurm-daemon.nix -- 2.49.0 From a43016ebeed7fd604463eb6cf2feed7a644611d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 14 Jun 2023 17:28:00 +0200 Subject: [PATCH 080/472] Simplify flake and expose host pkgs The configuration of the machines is now moved to m/ --- common/overlays.nix | 9 --- flake.nix | 105 ++++--------------------- {common => m/common}/boot.nix | 0 {common => m/common}/fs.nix | 0 {common => m/common}/hw.nix | 0 {common => m/common}/main.nix | 19 ++++- {common => m/common}/net.nix | 4 +- {common => m/common}/slurm.nix | 3 +- {common => m/common}/ssh.nix | 0 {common => m/common}/users.nix | 0 {hut => m/hut}/configuration.nix | 3 +- {hut => m/hut}/gitlab-runner.nix | 0 {hut => m/hut}/monitoring.nix | 0 {hut => m/hut}/nfs.nix | 0 {hut => m/hut}/secrets.nix | 0 {hut => m/hut}/secrets/nosv-token.age | Bin {hut => m/hut}/secrets/ovni-token.age | 0 {hut => m/hut}/slurm-daemon.nix | 0 {xeon01 => m/owl1}/configuration.nix | 0 {xeon02 => m/owl2}/configuration.nix | 0 {xeon08 => m/xeon08}/configuration.nix | 0 {xeon08 => m/xeon08}/cpufreq.nix | 0 {xeon08 => m/xeon08}/fs.nix | 0 {xeon08 => m/xeon08}/kernel/kernel.nix | 0 {xeon08 => m/xeon08}/kernel/lttng.nix | 0 {xeon08 => m/xeon08}/kernel/perf.nix | 0 {xeon08 => m/xeon08}/slurm.nix | 0 {xeon08 => m/xeon08}/users.nix | 0 overlays-compat/overlays.nix | 8 -- 29 files changed, 39 insertions(+), 112 deletions(-) delete mode 100644 common/overlays.nix rename {common => m/common}/boot.nix (100%) rename {common => m/common}/fs.nix (100%) rename {common => m/common}/hw.nix (100%) rename {common => m/common}/main.nix (80%) rename {common => m/common}/net.nix (96%) rename {common => m/common}/slurm.nix (61%) rename {common => m/common}/ssh.nix (100%) rename {common => m/common}/users.nix (100%) rename {hut => m/hut}/configuration.nix (89%) rename {hut => m/hut}/gitlab-runner.nix (100%) rename {hut => m/hut}/monitoring.nix (100%) rename {hut => m/hut}/nfs.nix (100%) rename {hut => m/hut}/secrets.nix (100%) rename {hut => m/hut}/secrets/nosv-token.age (100%) rename {hut => m/hut}/secrets/ovni-token.age (100%) rename {hut => m/hut}/slurm-daemon.nix (100%) rename {xeon01 => m/owl1}/configuration.nix (100%) rename {xeon02 => m/owl2}/configuration.nix (100%) rename {xeon08 => m/xeon08}/configuration.nix (100%) rename {xeon08 => m/xeon08}/cpufreq.nix (100%) rename {xeon08 => m/xeon08}/fs.nix (100%) rename {xeon08 => m/xeon08}/kernel/kernel.nix (100%) rename {xeon08 => m/xeon08}/kernel/lttng.nix (100%) rename {xeon08 => m/xeon08}/kernel/perf.nix (100%) rename {xeon08 => m/xeon08}/slurm.nix (100%) rename {xeon08 => m/xeon08}/users.nix (100%) delete mode 100644 overlays-compat/overlays.nix diff --git a/common/overlays.nix b/common/overlays.nix deleted file mode 100644 index fdf0706..0000000 --- a/common/overlays.nix +++ /dev/null @@ -1,9 +0,0 @@ -{ options, ... }: - -{ - nix.nixPath = - # Prepend default nixPath values. - options.nix.nixPath.default ++ - # Append our nixpkgs-overlays. - [ "nixpkgs-overlays=${../overlays-compat}" ]; -} diff --git a/flake.nix b/flake.nix index d3fc4ae..54642d0 100644 --- a/flake.nix +++ b/flake.nix @@ -6,97 +6,22 @@ bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git"; }; - outputs = { self, nixpkgs, agenix, bscpkgs, ... }: { + outputs = { self, nixpkgs, agenix, bscpkgs, ... }: +let + mkConf = name: nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + specialArgs = { inherit nixpkgs bscpkgs agenix; theFlake = self; }; + modules = [ "${self.outPath}/m/${name}/configuration.nix" ]; + }; +in + { nixosConfigurations = { - xeon01 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - ( {options, ...}: { - # Sel the nixos-config path to the one of the current flake - nixpkgs.overlays = [ bscpkgs.bscOverlay ]; - nix.nixPath = [ - "nixpkgs=${nixpkgs}" - "bscpkgs=${bscpkgs}" - "nixos-config=${self.outPath}/xeon01/configuration.nix" - "nixpkgs-overlays=${self.outPath}/overlays-compat" - ]; - nix.registry.nixpkgs.flake = nixpkgs; - nix.registry.bscpkgs.flake = bscpkgs; - system.configurationRevision = - if self ? rev - then self.rev - else throw ("Refusing to build from a dirty Git tree!"); - }) - ./xeon01/configuration.nix - ]; - }; - xeon02 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - ( {options, ...}: { - # Sel the nixos-config path to the one of the current flake - nixpkgs.overlays = [ bscpkgs.bscOverlay ]; - nix.nixPath = [ - "nixpkgs=${nixpkgs}" - "bscpkgs=${bscpkgs}" - "nixos-config=${self.outPath}/xeon02/configuration.nix" - "nixpkgs-overlays=${self.outPath}/overlays-compat" - ]; - nix.registry.nixpkgs.flake = nixpkgs; - nix.registry.bscpkgs.flake = bscpkgs; - system.configurationRevision = - if self ? rev - then self.rev - else throw ("Refusing to build from a dirty Git tree!"); - }) - ./xeon02/configuration.nix - ]; - }; - hut = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - ( {options, ...}: { - # Sel the nixos-config path to the one of the current flake - nixpkgs.overlays = [ bscpkgs.bscOverlay ]; - nix.nixPath = [ - "nixpkgs=${nixpkgs}" - "bscpkgs=${bscpkgs}" - "nixos-config=${self.outPath}/hut/configuration.nix" - "nixpkgs-overlays=${self.outPath}/overlays-compat" - ]; - nix.registry.nixpkgs.flake = nixpkgs; - nix.registry.bscpkgs.flake = bscpkgs; - system.configurationRevision = - if self ? rev - then self.rev - else throw ("Refusing to build from a dirty Git tree!"); - }) - agenix.nixosModules.default - ./hut/configuration.nix - ]; - }; - xeon08 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - ( {options, ...}: { - # Sel the nixos-config path to the one of the current flake - nixpkgs.overlays = [ bscpkgs.bscOverlay ]; - nix.nixPath = [ - "nixpkgs=${nixpkgs}" - "bscpkgs=${bscpkgs}" - "nixos-config=${self.outPath}/xeon08/configuration.nix" - "nixpkgs-overlays=${self.outPath}/overlays-compat" - ]; - nix.registry.nixpkgs.flake = nixpkgs; - nix.registry.bscpkgs.flake = bscpkgs; - system.configurationRevision = - if self ? rev - then self.rev - else throw ("Refusing to build from a dirty Git tree!"); - }) - ./xeon08/configuration.nix - ]; - }; + hut = mkConf "hut"; + owl1 = mkConf "owl1"; + owl2 = mkConf "owl2"; + xeon08 = mkConf "xeon08"; }; + + packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs; }; } diff --git a/common/boot.nix b/m/common/boot.nix similarity index 100% rename from common/boot.nix rename to m/common/boot.nix diff --git a/common/fs.nix b/m/common/fs.nix similarity index 100% rename from common/fs.nix rename to m/common/fs.nix diff --git a/common/hw.nix b/m/common/hw.nix similarity index 100% rename from common/hw.nix rename to m/common/hw.nix diff --git a/common/main.nix b/m/common/main.nix similarity index 80% rename from common/main.nix rename to m/common/main.nix index 59d5286..ba50376 100644 --- a/common/main.nix +++ b/m/common/main.nix @@ -1,4 +1,4 @@ -{ config, pkgs, ... }: +{ config, pkgs, nixpkgs, bscpkgs, agenix, theFlake, ... }: { imports = [ @@ -11,6 +11,23 @@ ./users.nix ]; + nixpkgs.overlays = [ bscpkgs.bscOverlay ]; + + nix.nixPath = [ + "nixpkgs=${nixpkgs}" + "bscpkgs=${bscpkgs}" + "jungle=${theFlake.outPath}" + ]; + + nix.registry.nixpkgs.flake = nixpkgs; + nix.registry.bscpkgs.flake = bscpkgs; + nix.registry.jungle.flake = theFlake; + + system.configurationRevision = + if theFlake ? rev + then theFlake.rev + else throw ("Refusing to build from a dirty Git tree!"); + environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree diff --git a/common/net.nix b/m/common/net.nix similarity index 96% rename from common/net.nix rename to m/common/net.nix index 51f80f4..504246c 100644 --- a/common/net.nix +++ b/m/common/net.nix @@ -43,12 +43,12 @@ 10.0.40.143 oss02-ipmi0 # Node Entry for node: xeon01 (ID=15) - 10.0.40.1 xeon01 xeon01-eth0 + 10.0.40.1 xeon01 xeon01-eth0 owl1 10.0.42.1 xeon01-ib0 10.0.40.101 xeon01-ipmi0 # Node Entry for node: xeon02 (ID=16) - 10.0.40.2 xeon02 xeon02-eth0 + 10.0.40.2 xeon02 xeon02-eth0 owl2 10.0.42.2 xeon02-ib0 10.0.40.102 xeon02-ipmi0 diff --git a/common/slurm.nix b/m/common/slurm.nix similarity index 61% rename from common/slurm.nix rename to m/common/slurm.nix index 2da5a02..c208b88 100644 --- a/common/slurm.nix +++ b/m/common/slurm.nix @@ -6,7 +6,8 @@ controlMachine = "hut"; clusterName = "owl"; nodeName = [ - "xeon[01-02,07] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=xeon" + "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" + "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; extraConfig = '' MpiDefault=pmix diff --git a/common/ssh.nix b/m/common/ssh.nix similarity index 100% rename from common/ssh.nix rename to m/common/ssh.nix diff --git a/common/users.nix b/m/common/users.nix similarity index 100% rename from common/users.nix rename to m/common/users.nix diff --git a/hut/configuration.nix b/m/hut/configuration.nix similarity index 89% rename from hut/configuration.nix rename to m/hut/configuration.nix index 423b18d..82d4c34 100644 --- a/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -1,4 +1,4 @@ -{ config, pkgs, ... }: +{ config, pkgs, agenix, ... }: { imports = [ @@ -8,6 +8,7 @@ ./monitoring.nix ./nfs.nix ./slurm-daemon.nix + agenix.nixosModules.default ]; # Select the this using the ID to avoid mismatches diff --git a/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix similarity index 100% rename from hut/gitlab-runner.nix rename to m/hut/gitlab-runner.nix diff --git a/hut/monitoring.nix b/m/hut/monitoring.nix similarity index 100% rename from hut/monitoring.nix rename to m/hut/monitoring.nix diff --git a/hut/nfs.nix b/m/hut/nfs.nix similarity index 100% rename from hut/nfs.nix rename to m/hut/nfs.nix diff --git a/hut/secrets.nix b/m/hut/secrets.nix similarity index 100% rename from hut/secrets.nix rename to m/hut/secrets.nix diff --git a/hut/secrets/nosv-token.age b/m/hut/secrets/nosv-token.age similarity index 100% rename from hut/secrets/nosv-token.age rename to m/hut/secrets/nosv-token.age diff --git a/hut/secrets/ovni-token.age b/m/hut/secrets/ovni-token.age similarity index 100% rename from hut/secrets/ovni-token.age rename to m/hut/secrets/ovni-token.age diff --git a/hut/slurm-daemon.nix b/m/hut/slurm-daemon.nix similarity index 100% rename from hut/slurm-daemon.nix rename to m/hut/slurm-daemon.nix diff --git a/xeon01/configuration.nix b/m/owl1/configuration.nix similarity index 100% rename from xeon01/configuration.nix rename to m/owl1/configuration.nix diff --git a/xeon02/configuration.nix b/m/owl2/configuration.nix similarity index 100% rename from xeon02/configuration.nix rename to m/owl2/configuration.nix diff --git a/xeon08/configuration.nix b/m/xeon08/configuration.nix similarity index 100% rename from xeon08/configuration.nix rename to m/xeon08/configuration.nix diff --git a/xeon08/cpufreq.nix b/m/xeon08/cpufreq.nix similarity index 100% rename from xeon08/cpufreq.nix rename to m/xeon08/cpufreq.nix diff --git a/xeon08/fs.nix b/m/xeon08/fs.nix similarity index 100% rename from xeon08/fs.nix rename to m/xeon08/fs.nix diff --git a/xeon08/kernel/kernel.nix b/m/xeon08/kernel/kernel.nix similarity index 100% rename from xeon08/kernel/kernel.nix rename to m/xeon08/kernel/kernel.nix diff --git a/xeon08/kernel/lttng.nix b/m/xeon08/kernel/lttng.nix similarity index 100% rename from xeon08/kernel/lttng.nix rename to m/xeon08/kernel/lttng.nix diff --git a/xeon08/kernel/perf.nix b/m/xeon08/kernel/perf.nix similarity index 100% rename from xeon08/kernel/perf.nix rename to m/xeon08/kernel/perf.nix diff --git a/xeon08/slurm.nix b/m/xeon08/slurm.nix similarity index 100% rename from xeon08/slurm.nix rename to m/xeon08/slurm.nix diff --git a/xeon08/users.nix b/m/xeon08/users.nix similarity index 100% rename from xeon08/users.nix rename to m/xeon08/users.nix diff --git a/overlays-compat/overlays.nix b/overlays-compat/overlays.nix deleted file mode 100644 index 8a606c5..0000000 --- a/overlays-compat/overlays.nix +++ /dev/null @@ -1,8 +0,0 @@ -self: super: -with super.lib; -let - # Load the system config and get the `nixpkgs.overlays` option - overlays = (import { }).config.nixpkgs.overlays; -in - # Apply all overlays to the input of the current "main" overlay - foldl' (flip extends) (_: super) overlays self -- 2.49.0 From 30c21155af5303ca0451f8774a6d31623d9a1b28 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 11:34:00 +0200 Subject: [PATCH 081/472] Add owl and all partition --- m/hut/slurm-daemon.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/m/hut/slurm-daemon.nix b/m/hut/slurm-daemon.nix index a8dd3b8..e6ab227 100644 --- a/m/hut/slurm-daemon.nix +++ b/m/hut/slurm-daemon.nix @@ -4,7 +4,8 @@ services.slurm = { server.enable = true; partitionName = [ - "xeon Nodes=xeon[01-02,07] Default=YES MaxTime=INFINITE State=UP" + "owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP" + "all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP" ]; }; } -- 2.49.0 From df91da8c347344ce31eae6fb99bc2f213d29acd3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 11:42:39 +0200 Subject: [PATCH 082/472] Change owl hostnames --- m/owl1/configuration.nix | 2 +- m/owl2/configuration.nix | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index 1daecec..a14ab21 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -7,7 +7,7 @@ boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c"; networking = { - hostName = "xeon01"; + hostName = "owl1"; interfaces.eno1.ipv4.addresses = [ { address = "10.0.40.1"; prefixLength = 24; diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 1b06afd..8022b36 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -12,7 +12,7 @@ #programs.ssh.setXAuthLocation = lib.mkForce true; networking = { - hostName = "xeon02"; + hostName = "owl2"; interfaces.eno1.ipv4.addresses = [ { address = "10.0.40.2"; prefixLength = 24; -- 2.49.0 From dfea0be2d9378aeca35f1692599139789e4ad8f2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 12:00:54 +0200 Subject: [PATCH 083/472] Set the name of the slurm cluster to jungle --- m/common/slurm.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/slurm.nix b/m/common/slurm.nix index c208b88..650156f 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -4,7 +4,7 @@ services.slurm = { client.enable = true; controlMachine = "hut"; - clusterName = "owl"; + clusterName = "jungle"; nodeName = [ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" -- 2.49.0 From 7d4281a5c10bf1846737d040ef8b5e96a420e715 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 12:01:12 +0200 Subject: [PATCH 084/472] Add ssh host keys --- m/common/ssh.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m/common/ssh.nix b/m/common/ssh.nix index d3676f7..08c958c 100644 --- a/m/common/ssh.nix +++ b/m/common/ssh.nix @@ -17,6 +17,7 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut" ]; rarias.openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" @@ -30,6 +31,10 @@ }; programs.ssh.knownHosts = { + "hut".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; + "owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv"; + "owl2".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK"; + "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; }; -- 2.49.0 From dfbeafa2b2063f9ef450d9d9946440014b97f78c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 12:13:07 +0200 Subject: [PATCH 085/472] Update rebuild script for all nodes --- rebuild.sh | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/rebuild.sh b/rebuild.sh index 5df9616..1320c24 100755 --- a/rebuild.sh +++ b/rebuild.sh @@ -1,19 +1,16 @@ -#!/bin/sh -e +#!/bin/sh -ex if [ "$(id -u)" != 0 ]; then echo "Needs root permissions" exit 1 fi -host=$(hostname) - -#conf="$(readlink -f .)/${host}/configuration.nix" -# -#if [ ! -e "$conf" ]; then -# echo "Missing config $conf" -# exit 1 -#fi -# -#NIXOS_CONFIG="${conf}" nixos-rebuild switch +if [ "$(hostname)" != "hut" ]; then + >&2 echo "must run from machine hut, not $(hostname)" + exit 1 +fi +# Update all nodes nixos-rebuild switch --flake . +nixos-rebuild switch --flake .#owl1 --target-host owl1 +nixos-rebuild switch --flake .#owl2 --target-host owl2 -- 2.49.0 From 2a0fe5a137edcc658b233b948ef1e1bae3ce602b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 17:16:05 +0200 Subject: [PATCH 086/472] Rename xeon08 to eudy From Eudyptula, a little penguin. --- flake.nix | 8 ++++---- m/common/net.nix | 2 +- m/{xeon08 => eudy}/configuration.nix | 2 +- m/{xeon08 => eudy}/cpufreq.nix | 0 m/{xeon08 => eudy}/fs.nix | 0 m/{xeon08 => eudy}/kernel/kernel.nix | 0 m/{xeon08 => eudy}/kernel/lttng.nix | 0 m/{xeon08 => eudy}/kernel/perf.nix | 0 m/{xeon08 => eudy}/slurm.nix | 0 m/{xeon08 => eudy}/users.nix | 0 10 files changed, 6 insertions(+), 6 deletions(-) rename m/{xeon08 => eudy}/configuration.nix (97%) rename m/{xeon08 => eudy}/cpufreq.nix (100%) rename m/{xeon08 => eudy}/fs.nix (100%) rename m/{xeon08 => eudy}/kernel/kernel.nix (100%) rename m/{xeon08 => eudy}/kernel/lttng.nix (100%) rename m/{xeon08 => eudy}/kernel/perf.nix (100%) rename m/{xeon08 => eudy}/slurm.nix (100%) rename m/{xeon08 => eudy}/users.nix (100%) diff --git a/flake.nix b/flake.nix index 54642d0..ebc935b 100644 --- a/flake.nix +++ b/flake.nix @@ -16,10 +16,10 @@ let in { nixosConfigurations = { - hut = mkConf "hut"; - owl1 = mkConf "owl1"; - owl2 = mkConf "owl2"; - xeon08 = mkConf "xeon08"; + hut = mkConf "hut"; + owl1 = mkConf "owl1"; + owl2 = mkConf "owl2"; + eudy = mkConf "eudy"; }; packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs; diff --git a/m/common/net.nix b/m/common/net.nix index 504246c..cfcd686 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -78,7 +78,7 @@ 10.0.40.107 xeon07-ipmi0 # Node Entry for node: xeon08 (ID=22) - 10.0.40.8 xeon08 xeon08-eth0 + 10.0.40.8 xeon08 xeon08-eth0 eudy 10.0.42.8 xeon08-ib0 10.0.40.108 xeon08-ipmi0 ''; diff --git a/m/xeon08/configuration.nix b/m/eudy/configuration.nix similarity index 97% rename from m/xeon08/configuration.nix rename to m/eudy/configuration.nix index 195117f..c627bf8 100644 --- a/m/xeon08/configuration.nix +++ b/m/eudy/configuration.nix @@ -24,7 +24,7 @@ # set up both ethernet and infiniband ips networking = { - hostName = "xeon08"; + hostName = "eudy"; interfaces.eno1.ipv4.addresses = [ { address = "10.0.40.8"; prefixLength = 24; diff --git a/m/xeon08/cpufreq.nix b/m/eudy/cpufreq.nix similarity index 100% rename from m/xeon08/cpufreq.nix rename to m/eudy/cpufreq.nix diff --git a/m/xeon08/fs.nix b/m/eudy/fs.nix similarity index 100% rename from m/xeon08/fs.nix rename to m/eudy/fs.nix diff --git a/m/xeon08/kernel/kernel.nix b/m/eudy/kernel/kernel.nix similarity index 100% rename from m/xeon08/kernel/kernel.nix rename to m/eudy/kernel/kernel.nix diff --git a/m/xeon08/kernel/lttng.nix b/m/eudy/kernel/lttng.nix similarity index 100% rename from m/xeon08/kernel/lttng.nix rename to m/eudy/kernel/lttng.nix diff --git a/m/xeon08/kernel/perf.nix b/m/eudy/kernel/perf.nix similarity index 100% rename from m/xeon08/kernel/perf.nix rename to m/eudy/kernel/perf.nix diff --git a/m/xeon08/slurm.nix b/m/eudy/slurm.nix similarity index 100% rename from m/xeon08/slurm.nix rename to m/eudy/slurm.nix diff --git a/m/xeon08/users.nix b/m/eudy/users.nix similarity index 100% rename from m/xeon08/users.nix rename to m/eudy/users.nix -- 2.49.0 From df378a293362a7e6695716294c871d5dda4a03e7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 17:29:48 +0200 Subject: [PATCH 087/472] Add eudy host key to known hosts --- m/common/ssh.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/common/ssh.nix b/m/common/ssh.nix index 08c958c..3e5bbd6 100644 --- a/m/common/ssh.nix +++ b/m/common/ssh.nix @@ -34,6 +34,7 @@ "hut".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; "owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv"; "owl2".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK"; + "eudy".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG"; "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; -- 2.49.0 From 530958496beeeb393d3fab4f1e09508988035c5f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 14:16:14 +0200 Subject: [PATCH 088/472] Add coments in slurm config --- m/common/slurm.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m/common/slurm.nix b/m/common/slurm.nix index 650156f..ce909df 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -10,7 +10,15 @@ "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; extraConfig = '' + # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but + # not with Intel MPI. For that use the compatibility shim libpmi.so + # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx + # library in SLURM (--mpi=pmix). See more details here: + # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 MpiDefault=pmix + + # When a node reboots return that node to the slurm queue as soon as it + # becomes operative again. ReturnToService=2 ''; }; -- 2.49.0 From 0c4a1efa2737a9f7ae2bb8b58ed08f19d5d044f0 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 14:16:51 +0200 Subject: [PATCH 089/472] Add mpich overlay --- m/common/main.nix | 2 ++ pkgs/mpi.nix | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 pkgs/mpi.nix diff --git a/m/common/main.nix b/m/common/main.nix index ba50376..a5bba65 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -9,6 +9,8 @@ ./slurm.nix ./ssh.nix ./users.nix + + ../../pkgs/mpi.nix ]; nixpkgs.overlays = [ bscpkgs.bscOverlay ]; diff --git a/pkgs/mpi.nix b/pkgs/mpi.nix new file mode 100644 index 0000000..3b4141f --- /dev/null +++ b/pkgs/mpi.nix @@ -0,0 +1,28 @@ +let + overlay = final: prev: + { + mpich = with final; prev.mpich.overrideAttrs (old: { + buildInput = old.buildInputs ++ [ + libfabric + pmix + ]; + configureFlags = [ + "--enable-shared" + "--enable-sharedlib" + "--with-pm=no" + "--with-device=ch4:ofi" + "--with-pmi=pmix" + "--with-pmix=${final.pmix}" + "--with-libfabric=${final.libfabric}" + "--enable-g=log" + ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ + "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 + "FCFLAGS=-fallow-argument-mismatch" + ]; + }); + }; +in + +{ + nixpkgs.overlays = [ overlay ]; +} -- 2.49.0 From 1f7045fcfed9adb5cc86fa69151188f0ef7834a0 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 15:54:55 +0200 Subject: [PATCH 090/472] Replace mpi inside bsc attribute --- pkgs/mpi.nix | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/pkgs/mpi.nix b/pkgs/mpi.nix index 3b4141f..b0e30d7 100644 --- a/pkgs/mpi.nix +++ b/pkgs/mpi.nix @@ -1,25 +1,28 @@ let overlay = final: prev: { - mpich = with final; prev.mpich.overrideAttrs (old: { - buildInput = old.buildInputs ++ [ - libfabric - pmix - ]; - configureFlags = [ - "--enable-shared" - "--enable-sharedlib" - "--with-pm=no" - "--with-device=ch4:ofi" - "--with-pmi=pmix" - "--with-pmix=${final.pmix}" - "--with-libfabric=${final.libfabric}" - "--enable-g=log" - ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ - "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 - "FCFLAGS=-fallow-argument-mismatch" - ]; - }); + bsc = prev.bsc.extend (bsc: { + mpi2 = bsc.mpich; + mpich = with final; prev.mpich.overrideAttrs (old: { + buildInput = old.buildInputs ++ [ + libfabric + pmix + ]; + configureFlags = [ + "--enable-shared" + "--enable-sharedlib" + "--with-pm=no" + "--with-device=ch4:ofi" + "--with-pmi=pmix" + "--with-pmix=${final.pmix}" + "--with-libfabric=${final.libfabric}" + "--enable-g=log" + ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ + "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 + "FCFLAGS=-fallow-argument-mismatch" + ]; + }); + }; }; in -- 2.49.0 From f2434a17c2ed742cf8774b158d1b585d6c812645 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 16:02:25 +0200 Subject: [PATCH 091/472] Use explicit order in overlays --- m/common/main.nix | 7 ++++--- pkgs/mpi.nix | 52 +++++++++++++++++++++-------------------------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/m/common/main.nix b/m/common/main.nix index a5bba65..ec3775e 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -9,11 +9,12 @@ ./slurm.nix ./ssh.nix ./users.nix - - ../../pkgs/mpi.nix ]; - nixpkgs.overlays = [ bscpkgs.bscOverlay ]; + nixpkgs.overlays = [ + bscpkgs.bscOverlay + (import ../../pkgs/mpi.nix) + ]; nix.nixPath = [ "nixpkgs=${nixpkgs}" diff --git a/pkgs/mpi.nix b/pkgs/mpi.nix index b0e30d7..adfde9e 100644 --- a/pkgs/mpi.nix +++ b/pkgs/mpi.nix @@ -1,31 +1,25 @@ -let - overlay = final: prev: - { - bsc = prev.bsc.extend (bsc: { - mpi2 = bsc.mpich; - mpich = with final; prev.mpich.overrideAttrs (old: { - buildInput = old.buildInputs ++ [ - libfabric - pmix - ]; - configureFlags = [ - "--enable-shared" - "--enable-sharedlib" - "--with-pm=no" - "--with-device=ch4:ofi" - "--with-pmi=pmix" - "--with-pmix=${final.pmix}" - "--with-libfabric=${final.libfabric}" - "--enable-g=log" - ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ - "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 - "FCFLAGS=-fallow-argument-mismatch" - ]; - }); - }; - }; -in - +final: prev: { - nixpkgs.overlays = [ overlay ]; + bsc = prev.bsc.extend (bsc: { + mpi2 = bsc.mpich; + mpich = with final; prev.mpich.overrideAttrs (old: { + buildInput = old.buildInputs ++ [ + libfabric + pmix + ]; + configureFlags = [ + "--enable-shared" + "--enable-sharedlib" + "--with-pm=no" + "--with-device=ch4:ofi" + "--with-pmi=pmix" + "--with-pmix=${final.pmix}" + "--with-libfabric=${final.libfabric}" + "--enable-g=log" + ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ + "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 + "FCFLAGS=-fallow-argument-mismatch" + ]; + }); + }); } -- 2.49.0 From 2053ec82b7a561cedcfc744d3ce5b0ae909bc89c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 16:04:36 +0200 Subject: [PATCH 092/472] Add missing parameter to extend --- pkgs/mpi.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkgs/mpi.nix b/pkgs/mpi.nix index adfde9e..58c9a17 100644 --- a/pkgs/mpi.nix +++ b/pkgs/mpi.nix @@ -1,7 +1,7 @@ final: prev: { - bsc = prev.bsc.extend (bsc: { - mpi2 = bsc.mpich; + bsc = prev.bsc.extend (bscFinal: bscPrev: { + mpi2 = bscFinal.mpich; mpich = with final; prev.mpich.overrideAttrs (old: { buildInput = old.buildInputs ++ [ libfabric -- 2.49.0 From f7d00dec259274d9e1c78b1adb10cfd5a79eb1a1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 16:05:17 +0200 Subject: [PATCH 093/472] Set mpi to mpich by default in bscpkgs --- pkgs/mpi.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/mpi.nix b/pkgs/mpi.nix index 58c9a17..cdaeadd 100644 --- a/pkgs/mpi.nix +++ b/pkgs/mpi.nix @@ -1,7 +1,7 @@ final: prev: { bsc = prev.bsc.extend (bscFinal: bscPrev: { - mpi2 = bscFinal.mpich; + mpi = bscFinal.mpich; mpich = with final; prev.mpich.overrideAttrs (old: { buildInput = old.buildInputs ++ [ libfabric -- 2.49.0 From db26b2ae37a9d1f001348b35971d3eb3e737dba5 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 18:33:54 +0200 Subject: [PATCH 094/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'bscpkgs': 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs%2fheads%2fmaster&rev=c775ee4d6f76aded05b08ae13924c302f18f9b2c' (2023-04-26) → 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs%2fheads%2fmaster&rev=cbe9af5d042e9d5585fe2acef65a1347c68b2fbd' (2023-06-16) --- flake.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flake.lock b/flake.lock index a8e8e79..62f3468 100644 --- a/flake.lock +++ b/flake.lock @@ -23,11 +23,11 @@ }, "bscpkgs": { "locked": { - "lastModified": 1682521628, - "narHash": "sha256-uRIDCuJNt3rdikWiRcM3VPsQSk0vpQB1JO3Wx24psJo=", + "lastModified": 1686927936, + "narHash": "sha256-y9/R5OqDRFeq5kKRAsv9gge7vkeF/g1ImlbivpjYP/4=", "ref": "refs/heads/master", - "rev": "c775ee4d6f76aded05b08ae13924c302f18f9b2c", - "revCount": 807, + "rev": "cbe9af5d042e9d5585fe2acef65a1347c68b2fbd", + "revCount": 834, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, -- 2.49.0 From 0a06cf564b2986da299fc4da28672b9e96ec36e2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 16 Jun 2023 19:22:41 +0200 Subject: [PATCH 095/472] Add osumb to the system packages --- m/common/main.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/common/main.nix b/m/common/main.nix index ec3775e..f8622b1 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -35,6 +35,8 @@ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree ncdu + # From bsckgs overlay + bsc.osumb ]; systemd.services."serial-getty@ttyS0" = { -- 2.49.0 From 5e728773c3f452ccde7b2c25265944f5b8ec0700 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 20 Jun 2023 12:48:00 +0200 Subject: [PATCH 096/472] Add rpenacob user --- m/common/ssh.nix | 3 +++ m/common/users.nix | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/m/common/ssh.nix b/m/common/ssh.nix index 3e5bbd6..c09e3f3 100644 --- a/m/common/ssh.nix +++ b/m/common/ssh.nix @@ -28,6 +28,9 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland" ]; + rpenacob.openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" + ]; }; programs.ssh.knownHosts = { diff --git a/m/common/users.nix b/m/common/users.nix index be34db7..ce35487 100644 --- a/m/common/users.nix +++ b/m/common/users.nix @@ -23,6 +23,15 @@ extraGroups = [ "wheel" ]; hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; }; + + rpenacob = { + uid = 2761; + isNormalUser = true; + home = "/home/Computational/rpenacob"; + description = "Raúl Penacoba"; + group = "Computational"; + hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; + }; }; groups = { -- 2.49.0 From 85896f8546bd98095c0a8d43e82a9db2121be7df Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 20 Jun 2023 14:08:34 +0200 Subject: [PATCH 097/472] Move authorized keys to users.nix --- m/common/ssh.nix | 22 ---------------------- m/common/users.nix | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/m/common/ssh.nix b/m/common/ssh.nix index c09e3f3..6dcc9f3 100644 --- a/m/common/ssh.nix +++ b/m/common/ssh.nix @@ -11,28 +11,6 @@ ProxyCommand nc -X connect -x localhost:23080 %h %p ''; - # Authorize keys - users.users = { - root.openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut" - ]; - rarias.openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal" - ]; - arocanon.openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland" - ]; - rpenacob.openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" - ]; - }; - programs.ssh.knownHosts = { "hut".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; "owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv"; diff --git a/m/common/users.nix b/m/common/users.nix index ce35487..3399ed3 100644 --- a/m/common/users.nix +++ b/m/common/users.nix @@ -4,6 +4,15 @@ users = { mutableUsers = false; users = { + # Generate hashedPassword with `mkpasswd -m sha-512` + + root.openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut" + ]; + rarias = { uid = 1880; isNormalUser = true; @@ -12,6 +21,11 @@ group = "Computational"; extraGroups = [ "wheel" ]; hashedPassword = "$6$u06tkCy13enReBsb$xiI.twRvvTfH4jdS3s68NZ7U9PSbGKs5.LXU/UgoawSwNWhZo2hRAjNL5qG0/lAckzcho2LjD0r3NfVPvthY6/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal" + ]; }; arocanon = { @@ -22,15 +36,22 @@ group = "Computational"; extraGroups = [ "wheel" ]; hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland" + ]; }; rpenacob = { uid = 2761; isNormalUser = true; home = "/home/Computational/rpenacob"; - description = "Raúl Penacoba"; + description = "Raúl Peñacoba"; group = "Computational"; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" + ]; }; }; -- 2.49.0 From 67a57cb3e58cb102d30373ddbb00eeab4fa9970a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 21 Jun 2023 13:16:23 +0200 Subject: [PATCH 098/472] Allow srun to specify the cpu binding The task/affinity plugin needs to be selected. --- m/common/slurm.nix | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/m/common/slurm.nix b/m/common/slurm.nix index ce909df..08de3fd 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -9,6 +9,8 @@ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; + + # See slurm.conf(5) for more details about these options. extraConfig = '' # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but # not with Intel MPI. For that use the compatibility shim libpmi.so @@ -20,6 +22,13 @@ # When a node reboots return that node to the slurm queue as soon as it # becomes operative again. ReturnToService=2 + + # Track all processes by using a cgroup + ProctrackType=proctrack/cgroup + + # Enable task/affinity to allow the jobs to run in a specified subset of + # the resources. Use the task/cgroup plugin to enable process containment. + TaskPlugin=task/affinity,task/cgroup ''; }; } -- 2.49.0 From f78f4f582260f49df30b73148cefdb6e162fe0ee Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 21 Jun 2023 15:41:06 +0200 Subject: [PATCH 099/472] Add perf to packages --- m/common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/main.nix b/m/common/main.nix index f8622b1..9a6e690 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -34,7 +34,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree - ncdu + ncdu config.boot.kernelPackages.perf # From bsckgs overlay bsc.osumb ]; -- 2.49.0 From 1ec8d7a62504870c9c6ac05984594381cc2a7b14 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 21 Jun 2023 16:23:16 +0200 Subject: [PATCH 100/472] Set perf paranoid to 0 by default --- m/common/boot.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/common/boot.nix b/m/common/boot.nix index ca18e0e..c93d5cc 100644 --- a/m/common/boot.nix +++ b/m/common/boot.nix @@ -18,6 +18,10 @@ "console=ttyS0,115200" ]; + boot.kernel.sysctl = { + "kernel.perf_event_paranoid" = lib.mkDefault 0; + }; + boot.kernelPackages = pkgs.linuxPackages_latest; #boot.kernelPatches = lib.singleton { -- 2.49.0 From adf1ff29a71dbbb4d4d5cabd7f3842a294b921a0 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 23 Jun 2023 16:01:27 +0200 Subject: [PATCH 101/472] Lower perf_event_paranoid to -1 --- m/common/boot.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/boot.nix b/m/common/boot.nix index c93d5cc..ba917da 100644 --- a/m/common/boot.nix +++ b/m/common/boot.nix @@ -19,7 +19,7 @@ ]; boot.kernel.sysctl = { - "kernel.perf_event_paranoid" = lib.mkDefault 0; + "kernel.perf_event_paranoid" = lib.mkDefault "-1"; }; boot.kernelPackages = pkgs.linuxPackages_latest; -- 2.49.0 From a38072762fffabc59ea020f547079795095df1ec Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 23 Jun 2023 16:12:25 +0200 Subject: [PATCH 102/472] Add DNS tools to resolve hosts --- m/common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/main.nix b/m/common/main.nix index 9a6e690..e43ce4f 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -34,7 +34,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree - ncdu config.boot.kernelPackages.perf + ncdu config.boot.kernelPackages.perf ldns # From bsckgs overlay bsc.osumb ]; -- 2.49.0 From 13e365002c9571d0c55f7b7484869983bde72427 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 23 Jun 2023 16:22:18 +0200 Subject: [PATCH 103/472] Use our host names first by default --- m/common/net.nix | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/m/common/net.nix b/m/common/net.nix index cfcd686..1173cc2 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -43,12 +43,12 @@ 10.0.40.143 oss02-ipmi0 # Node Entry for node: xeon01 (ID=15) - 10.0.40.1 xeon01 xeon01-eth0 owl1 + 10.0.40.1 owl1 xeon01 xeon01-eth0 10.0.42.1 xeon01-ib0 10.0.40.101 xeon01-ipmi0 # Node Entry for node: xeon02 (ID=16) - 10.0.40.2 xeon02 xeon02-eth0 owl2 + 10.0.40.2 owl2 xeon02 xeon02-eth0 10.0.42.2 xeon02-ib0 10.0.40.102 xeon02-ipmi0 @@ -73,12 +73,12 @@ 10.0.40.106 xeon06-ipmi0 # Node Entry for node: xeon07 (ID=21) - 10.0.40.7 xeon07 xeon07-eth0 hut + 10.0.40.7 hut xeon07 xeon07-eth0 10.0.42.7 xeon07-ib0 10.0.40.107 xeon07-ipmi0 # Node Entry for node: xeon08 (ID=22) - 10.0.40.8 xeon08 xeon08-eth0 eudy + 10.0.40.8 eudy xeon08 xeon08-eth0 10.0.42.8 xeon08-ib0 10.0.40.108 xeon08-ipmi0 ''; -- 2.49.0 From 9be15fdad2e50d395cfc0fd590fee9630a2f4169 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 30 Jun 2023 14:01:35 +0200 Subject: [PATCH 104/472] Add the ssfhead node as gateway --- m/common/net.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/net.nix b/m/common/net.nix index 1173cc2..4970f40 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -8,7 +8,7 @@ networking = { enableIPv6 = false; useDHCP = false; - #defaultGateway = "10.0.40.30"; + defaultGateway = "10.0.40.30"; nameservers = ["8.8.8.8"]; proxy = { default = "http://localhost:23080/"; -- 2.49.0 From d20fa359d9aa765a9e666f3fc53c12f344a37ed3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 30 Jun 2023 14:02:15 +0200 Subject: [PATCH 105/472] Enable NTP using the BSC time server --- m/common/main.nix | 1 + m/common/ntp.nix | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 m/common/ntp.nix diff --git a/m/common/main.nix b/m/common/main.nix index e43ce4f..7054495 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -6,6 +6,7 @@ ./fs.nix ./hw.nix ./net.nix + ./ntp.nix ./slurm.nix ./ssh.nix ./users.nix diff --git a/m/common/ntp.nix b/m/common/ntp.nix new file mode 100644 index 0000000..d4ddb25 --- /dev/null +++ b/m/common/ntp.nix @@ -0,0 +1,9 @@ +{ pkgs, ... }: + +{ + services.ntp.enable = true; + + # Use the NTP server at BSC, as we don't have direct access + # to the outside world + networking.timeServers = [ "84.88.52.36" ]; +} -- 2.49.0 From c31bfd6b4d715a7d29dded199937123450549dce Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Thu, 29 Jun 2023 09:14:39 +0200 Subject: [PATCH 106/472] eudy: disable all cpu mitigations --- m/eudy/kernel/kernel.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m/eudy/kernel/kernel.nix b/m/eudy/kernel/kernel.nix index 10bfead..17a580b 100644 --- a/m/eudy/kernel/kernel.nix +++ b/m/eudy/kernel/kernel.nix @@ -65,4 +65,9 @@ in { ./perf.nix ]; boot.kernelPackages = lib.mkForce kernel; + + # disable all cpu mitigations + boot.kernelParams = [ + "mitigations=off" + ]; } -- 2.49.0 From cfbfcdbe8cff8a3e58b423a4c537a220ed6b2adf Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 30 Jun 2023 12:49:44 +0200 Subject: [PATCH 107/472] eudy: Enable memory overcommit --- m/eudy/kernel/kernel.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/eudy/kernel/kernel.nix b/m/eudy/kernel/kernel.nix index 17a580b..3078fad 100644 --- a/m/eudy/kernel/kernel.nix +++ b/m/eudy/kernel/kernel.nix @@ -70,4 +70,8 @@ in { boot.kernelParams = [ "mitigations=off" ]; + + # enable memory overcommit, needed to build a taglibc system using nix after + # increasing the openblas memory footprint + boot.kernel.sysctl."vm.overcommit_memory" = 1; } -- 2.49.0 From e6bb6e735d70fb73e6ee218706c6288f455c5349 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Wed, 12 Jul 2023 13:22:42 +0200 Subject: [PATCH 108/472] eudy: Add fcsv3 and intermediate versions for testing --- m/eudy/kernel/kernel.nix | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/m/eudy/kernel/kernel.nix b/m/eudy/kernel/kernel.nix index 3078fad..016b8cb 100644 --- a/m/eudy/kernel/kernel.nix +++ b/m/eudy/kernel/kernel.nix @@ -21,14 +21,14 @@ let # configfile = if lockdep then ./configs/lockdep else ./configs/defconfig; #}; - kernel = nixos-fcsv2; + kernel = nixos-fcsv3; - nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { version = "6.2.8"; src = builtins.fetchGit { url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; rev = gitCommit; - ref = "fcs"; + ref = branch; }; structuredExtraConfig = with lib.kernel; { # add general custom kernel options here @@ -44,6 +44,17 @@ let nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";}; nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";}; + nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";}; + + # always use fcs_sched_setaffinity + #nixos-debug = nixos-fcs-kernel {gitCommit = "7d0bf285fca92badc8df3c9907a9ab30db4418aa";}; + # remove need_check_cgroup + #nixos-debug = nixos-fcs-kernel {gitCommit = "4cc4efaab5e4a0bfa3089e935215b981c1922919";}; + # merge again fcs_wake and fcs_wait + #nixos-debug = nixos-fcs-kernel {gitCommit = "40c6f72f4ae54b0b636b193ac0648fb5730c810d";}; + # start from scratch, this is the working version with split fcs_wake and fcs_wait + nixos-debug = nixos-fcs-kernel {gitCommit = "c9a39d6a4ca83845b4e71fcc268fb0a76aff1bdf"; branch = "fcs-test"; }; + nixos-fcsv1-lockstat = nixos-fcs-kernel { gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be"; lockStat = true; @@ -52,8 +63,12 @@ let gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; lockStat = true; }; - nixos-fcsv2-lockstat-preempt = nixos-fcs-kernel { - gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; + nixos-fcsv3-lockstat = nixos-fcs-kernel { + gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; + lockStat = true; + }; + nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel { + gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; lockStat = true; preempt = true; }; -- 2.49.0 From 45ac6e95e95fc57f08f540839f78895d348ff929 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 21 Jul 2023 10:34:19 +0200 Subject: [PATCH 109/472] Add koro node --- flake.nix | 1 + m/common/net.nix | 2 +- m/common/ssh.nix | 1 + m/koro/configuration.nix | 37 +++++++++++++++++++++++ m/koro/kernel.nix | 64 ++++++++++++++++++++++++++++++++++++++++ m/koro/users.nix | 17 +++++++++++ 6 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 m/koro/configuration.nix create mode 100644 m/koro/kernel.nix create mode 100644 m/koro/users.nix diff --git a/flake.nix b/flake.nix index ebc935b..5f67afc 100644 --- a/flake.nix +++ b/flake.nix @@ -20,6 +20,7 @@ in owl1 = mkConf "owl1"; owl2 = mkConf "owl2"; eudy = mkConf "eudy"; + koro = mkConf "koro"; }; packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs; diff --git a/m/common/net.nix b/m/common/net.nix index 4970f40..b2c09ca 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -63,7 +63,7 @@ 10.0.40.104 xeon04-ipmi0 # Node Entry for node: xeon05 (ID=19) - 10.0.40.5 xeon05 xeon05-eth0 + 10.0.40.5 koro xeon05 xeon05-eth0 10.0.42.5 xeon05-ib0 10.0.40.105 xeon05-ipmi0 diff --git a/m/common/ssh.nix b/m/common/ssh.nix index 6dcc9f3..2d805bf 100644 --- a/m/common/ssh.nix +++ b/m/common/ssh.nix @@ -16,6 +16,7 @@ "owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv"; "owl2".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK"; "eudy".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG"; + "koro".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67"; "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; diff --git a/m/koro/configuration.nix b/m/koro/configuration.nix new file mode 100644 index 0000000..221a871 --- /dev/null +++ b/m/koro/configuration.nix @@ -0,0 +1,37 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/main.nix + #(modulesPath + "/installer/netboot/netboot-minimal.nix") + + ../eudy/cpufreq.nix + ../eudy/users.nix + ../eudy/slurm.nix + ./users.nix + ./kernel.nix + ]; + + # Select this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5376d2"; + + # disable automatic garbage collector + nix.gc.automatic = lib.mkForce false; + + # members of the tracing group can use the lttng-provided kernel events + # without root permissions + users.groups.tracing.members = [ "arocanon" ]; + + # set up both ethernet and infiniband ips + networking = { + hostName = "koro"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.5"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.5"; + prefixLength = 24; + } ]; + }; +} diff --git a/m/koro/kernel.nix b/m/koro/kernel.nix new file mode 100644 index 0000000..016ac52 --- /dev/null +++ b/m/koro/kernel.nix @@ -0,0 +1,64 @@ +{ pkgs, lib, ... }: + +let + kernel = nixos-fcsv4; + + nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + version = "6.2.8"; + src = builtins.fetchGit { + url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + rev = gitCommit; + ref = branch; + }; + structuredExtraConfig = with lib.kernel; { + # add general custom kernel options here + } // lib.optionalAttrs lockStat { + LOCK_STAT = yes; + } // lib.optionalAttrs preempt { + PREEMPT = lib.mkForce yes; + PREEMPT_VOLUNTARY = lib.mkForce no; + }; + kernelPatches = []; + extraMeta.branch = lib.versions.majorMinor version; + }); + + nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";}; + nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";}; + nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";}; + nixos-fcsv4 = nixos-fcs-kernel {gitCommit = "c94c3d946f33ac3e5782a02ee002cc1164c0cb4f";}; + + nixos-fcsv1-lockstat = nixos-fcs-kernel { + gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be"; + lockStat = true; + }; + nixos-fcsv2-lockstat = nixos-fcs-kernel { + gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; + lockStat = true; + }; + nixos-fcsv3-lockstat = nixos-fcs-kernel { + gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; + lockStat = true; + }; + nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel { + gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; + lockStat = true; + preempt = true; + }; + latest = pkgs.linuxPackages_latest; + +in { + imports = [ + ../eudy/kernel/lttng.nix + ../eudy/kernel/perf.nix + ]; + boot.kernelPackages = lib.mkForce kernel; + + # disable all cpu mitigations + boot.kernelParams = [ + "mitigations=off" + ]; + + # enable memory overcommit, needed to build a taglibc system using nix after + # increasing the openblas memory footprint + boot.kernel.sysctl."vm.overcommit_memory" = lib.mkForce 1; +} diff --git a/m/koro/users.nix b/m/koro/users.nix new file mode 100644 index 0000000..4a4f794 --- /dev/null +++ b/m/koro/users.nix @@ -0,0 +1,17 @@ +{ ... }: + +{ + users.users = { + vlopez = { + uid = 4334; + isNormalUser = true; + home = "/home/Computational/vlopez"; + description = "Victor López"; + group = "Computational"; + hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch" + ]; + }; + }; +} -- 2.49.0 From 312f2cb3688e90478dd742db812bba4abc6cc513 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 21 Jul 2023 10:34:37 +0200 Subject: [PATCH 110/472] koro: Add vlopez user --- m/koro/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/koro/configuration.nix b/m/koro/configuration.nix index 221a871..6623549 100644 --- a/m/koro/configuration.nix +++ b/m/koro/configuration.nix @@ -20,7 +20,7 @@ # members of the tracing group can use the lttng-provided kernel events # without root permissions - users.groups.tracing.members = [ "arocanon" ]; + users.groups.tracing.members = [ "arocanon" "vlopez" ]; # set up both ethernet and infiniband ips networking = { -- 2.49.0 From 544d5a3d695e37a18e1b43b15d3ef10ccce78195 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Jul 2023 14:24:21 +0200 Subject: [PATCH 111/472] Kill slurmd remaining processes on upgrade --- m/common/slurm.nix | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/m/common/slurm.nix b/m/common/slurm.nix index 08de3fd..b02a914 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -1,6 +1,14 @@ -{ ... }: +{ lib, ... }: { + systemd.services.slurmd.serviceConfig = { + # Kill all processes in the control group on stop/restart. This will kill + # all the jobs running, so ensure that we only upgrade when the nodes are + # not in use. See: + # https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb + # https://bugs.schedmd.com/show_bug.cgi?id=2095#c24 + KillMode = lib.mkForce "control-group"; + }; services.slurm = { client.enable = true; controlMachine = "hut"; -- 2.49.0 From e8bab9928d9a0fc60f247bd4e1baabe2f6a52635 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Jul 2023 17:19:17 +0200 Subject: [PATCH 112/472] Upgrade flake: nixpkgs, bscpkgs and agenix --- flake.lock | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/flake.lock b/flake.lock index 62f3468..6c0a419 100644 --- a/flake.lock +++ b/flake.lock @@ -3,16 +3,17 @@ "agenix": { "inputs": { "darwin": "darwin", + "home-manager": "home-manager", "nixpkgs": [ "nixpkgs" ] }, "locked": { - "lastModified": 1682101079, - "narHash": "sha256-MdAhtjrLKnk2uiqun1FWABbKpLH090oeqCSiWemtuck=", + "lastModified": 1690228878, + "narHash": "sha256-9Xe7JV0krp4RJC9W9W9WutZVlw6BlHTFMiUP/k48LQY=", "owner": "ryantm", "repo": "agenix", - "rev": "2994d002dcff5353ca1ac48ec584c7f6589fe447", + "rev": "d8c973fd228949736dedf61b7f8cc1ece3236792", "type": "github" }, "original": { @@ -23,11 +24,11 @@ }, "bscpkgs": { "locked": { - "lastModified": 1686927936, - "narHash": "sha256-y9/R5OqDRFeq5kKRAsv9gge7vkeF/g1ImlbivpjYP/4=", + "lastModified": 1690380002, + "narHash": "sha256-7T1a46WMG/AfWP7zPVrrnjyqyfuUnjNZCdeeX0KM8WA=", "ref": "refs/heads/master", - "rev": "cbe9af5d042e9d5585fe2acef65a1347c68b2fbd", - "revCount": 834, + "rev": "976cdd5a4d98a4b772d35d9cdcc758bbd4eef1c6", + "revCount": 840, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, @@ -58,13 +59,34 @@ "type": "github" } }, + "home-manager": { + "inputs": { + "nixpkgs": [ + "agenix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1682203081, + "narHash": "sha256-kRL4ejWDhi0zph/FpebFYhzqlOBrk0Pl3dzGEKSAlEw=", + "owner": "nix-community", + "repo": "home-manager", + "rev": "32d3e39c491e2f91152c84f8ad8b003420eab0a1", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "home-manager", + "type": "github" + } + }, "nixpkgs": { "locked": { - "lastModified": 1682526928, - "narHash": "sha256-2cKh4O6t1rQ8Ok+v16URynmb0rV7oZPEbXkU0owNLQs=", + "lastModified": 1690272529, + "narHash": "sha256-MakzcKXEdv/I4qJUtq/k/eG+rVmyOZLnYNC2w1mB59Y=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "d6b863fd9b7bb962e6f9fdf292419a775e772891", + "rev": "ef99fa5c5ed624460217c31ac4271cfb5cb2502c", "type": "github" }, "original": { -- 2.49.0 From 07411beb495c1c3dc7b3549eeb5abc0ad784d966 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 27 Jul 2023 17:22:20 +0200 Subject: [PATCH 113/472] GRUB version no longer needed --- m/common/boot.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/m/common/boot.nix b/m/common/boot.nix index ba917da..8b71901 100644 --- a/m/common/boot.nix +++ b/m/common/boot.nix @@ -3,7 +3,6 @@ { # Use the GRUB 2 boot loader. boot.loader.grub.enable = lib.mkForce true; - boot.loader.grub.version = 2; # Enable GRUB2 serial console boot.loader.grub.extraConfig = '' -- 2.49.0 From e497e1b88bd5e5780a99cc095220c7a5494baff4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Jul 2023 13:48:30 +0200 Subject: [PATCH 114/472] Allow access to devices for node_exporter --- m/hut/monitoring.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index c55d366..7690724 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -23,6 +23,10 @@ systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false; systemd.services.prometheus-ipmi-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + # We need access to the devices to monitor the disk space + systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only"; + virtualisation.docker.daemon.settings = { metrics-addr = "127.0.0.1:9323"; }; -- 2.49.0 From 8c14b75e44d02c065ac858faabcae94925609ac4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Jul 2023 14:24:51 +0200 Subject: [PATCH 115/472] Update nixpkgs to fix docker problem --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 6c0a419..956eefe 100644 --- a/flake.lock +++ b/flake.lock @@ -82,11 +82,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1690272529, - "narHash": "sha256-MakzcKXEdv/I4qJUtq/k/eG+rVmyOZLnYNC2w1mB59Y=", + "lastModified": 1690367991, + "narHash": "sha256-2VwOn1l8y6+cu7zjNE8MgeGJNNz1eat1HwHrINeogFA=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "ef99fa5c5ed624460217c31ac4271cfb5cb2502c", + "rev": "c9cf0708f00fbe553319258e48ca89ff9a413703", "type": "github" }, "original": { -- 2.49.0 From f98af9aeef1d2529940006d6ff5b3a49725c2268 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 17 Aug 2023 12:37:58 +0200 Subject: [PATCH 116/472] Don't set all_proxy --- m/common/net.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/common/net.nix b/m/common/net.nix index b2c09ca..9d6a28a 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -13,6 +13,9 @@ proxy = { default = "http://localhost:23080/"; noProxy = "127.0.0.1,localhost,internal.domain"; + # Don't set all_proxy as go complains and breaks the gitlab runner, see: + # https://github.com/golang/go/issues/16715 + allProxy = null; }; firewall = { -- 2.49.0 From 3424cac761d80d4587334e1e085f2267c30f1d97 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Jul 2023 16:19:59 +0200 Subject: [PATCH 117/472] Increase prometheus retention time to one year --- m/hut/monitoring.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 7690724..d68fe3c 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -18,6 +18,7 @@ services.prometheus = { enable = true; port = 9001; + retentionTime = "1y"; }; systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false; -- 2.49.0 From 1622b3e7fcc2b7feb7968cd70c298ffb6130be7f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 17 Aug 2023 18:55:40 +0200 Subject: [PATCH 118/472] Monitor power from other nodes via LAN --- m/hut/ipmi.yml | 13 ++++++++++ m/hut/monitoring.nix | 62 +++++++++++++++++++++++++++++++++++++++++--- m/hut/targets.yml | 11 ++++++++ 3 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 m/hut/ipmi.yml create mode 100644 m/hut/targets.yml diff --git a/m/hut/ipmi.yml b/m/hut/ipmi.yml new file mode 100644 index 0000000..0d68a53 --- /dev/null +++ b/m/hut/ipmi.yml @@ -0,0 +1,13 @@ +modules: + default: + collectors: + - bmc + - ipmi + - chassis + + lan: + collectors: + - ipmi + - chassis + user: "" + pass: "" diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index d68fe3c..a00fb9b 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -42,9 +42,13 @@ services.prometheus = { exporters = { - ipmi.enable = true; - ipmi.group = "root"; - ipmi.user = "root"; + ipmi = { + enable = true; + group = "root"; + user = "root"; + configFile = ./ipmi.yml; + #extraFlags = [ "--log.level=debug" ]; + }; node = { enable = true; enabledCollectors = [ "systemd" ]; @@ -66,6 +70,58 @@ ]; }]; } + { + # Scrape the IPMI info of the hosts remotely via LAN + job_name = "ipmi-lan"; + scrape_interval = "1m"; + scrape_timeout = "30s"; + metrics_path = "/ipmi"; + scheme = "http"; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + separator = ";"; + regex = "(.*)(:80)?"; + target_label = "__param_target"; + replacement = "\${1}"; + action = "replace"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + separator = ";"; + regex = "(.*)"; + target_label = "instance"; + replacement = "\${1}"; + action = "replace"; + } + { + # Sets the fixed "module=lan" URL param + separator = ";"; + regex = "(.*)"; + target_label = "__param_module"; + replacement = "lan"; + action = "replace"; + } + { + # Sets the target to query as the localhost IPMI exporter + separator = ";"; + regex = ".*"; + target_label = "__address__"; + replacement = "127.0.0.1:9290"; + action = "replace"; + } + ]; + + # Load the list of targets from another file + file_sd_configs = [ + { + files = [ "${./targets.yml}" ]; + refresh_interval = "30s"; + } + ]; + } ]; }; } diff --git a/m/hut/targets.yml b/m/hut/targets.yml new file mode 100644 index 0000000..2cecd66 --- /dev/null +++ b/m/hut/targets.yml @@ -0,0 +1,11 @@ +- targets: + - 10.0.40.101 + - 10.0.40.102 + - 10.0.40.103 + - 10.0.40.104 + - 10.0.40.105 + - 10.0.40.106 + - 10.0.40.107 + - 10.0.40.108 + labels: + job: ipmi-lan -- 2.49.0 From a7e09e55df3996edd6fe3753fd6e2590f91f212e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Aug 2023 10:28:26 +0200 Subject: [PATCH 119/472] Update flake --- flake.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flake.lock b/flake.lock index 956eefe..1a3da19 100644 --- a/flake.lock +++ b/flake.lock @@ -24,11 +24,11 @@ }, "bscpkgs": { "locked": { - "lastModified": 1690380002, - "narHash": "sha256-7T1a46WMG/AfWP7zPVrrnjyqyfuUnjNZCdeeX0KM8WA=", + "lastModified": 1690560045, + "narHash": "sha256-39ZP+FIzlWoN3c43hReBYpStg4RLYw/z7TdxCQmOvTM=", "ref": "refs/heads/master", - "rev": "976cdd5a4d98a4b772d35d9cdcc758bbd4eef1c6", - "revCount": 840, + "rev": "b4a20d7c3af854b39682484adfd1c7979319f439", + "revCount": 841, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, @@ -82,11 +82,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1690367991, - "narHash": "sha256-2VwOn1l8y6+cu7zjNE8MgeGJNNz1eat1HwHrINeogFA=", + "lastModified": 1692447944, + "narHash": "sha256-fkJGNjEmTPvqBs215EQU4r9ivecV5Qge5cF/QDLVn3U=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "c9cf0708f00fbe553319258e48ca89ff9a413703", + "rev": "d680ded26da5cf104dd2735a51e88d2d8f487b4d", "type": "github" }, "original": { -- 2.49.0 From 0ce574800ed26a31af0eb4337e23e3b97dbaec02 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Jul 2023 19:49:48 +0200 Subject: [PATCH 120/472] Add bay node --- flake.nix | 1 + m/bay/configuration.nix | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 m/bay/configuration.nix diff --git a/flake.nix b/flake.nix index 5f67afc..6ce0689 100644 --- a/flake.nix +++ b/flake.nix @@ -21,6 +21,7 @@ in owl2 = mkConf "owl2"; eudy = mkConf "eudy"; koro = mkConf "koro"; + bay = mkConf "bay"; }; packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs; diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix new file mode 100644 index 0000000..04965fd --- /dev/null +++ b/m/bay/configuration.nix @@ -0,0 +1,26 @@ +{ config, pkgs, modulesPath, ... }: + +{ + imports = [ + (modulesPath + "/installer/netboot/netboot-minimal.nix") + ../common/main.nix + ]; + + services.openssh.settings.X11Forwarding = false; + nixpkgs.config.allowBroken = true; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d"; + + networking = { + hostName = "bay"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.40"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.40"; + prefixLength = 24; + } ]; + }; +} -- 2.49.0 From 0b55ce3d027d58e04096821aa1bbd245502a1466 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Jul 2023 20:31:44 +0200 Subject: [PATCH 121/472] Remove netboot and fixes --- m/bay/configuration.nix | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 04965fd..b42abf1 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -1,14 +1,10 @@ -{ config, pkgs, modulesPath, ... }: +{ config, pkgs, ... }: { imports = [ - (modulesPath + "/installer/netboot/netboot-minimal.nix") ../common/main.nix ]; - services.openssh.settings.X11Forwarding = false; - nixpkgs.config.allowBroken = true; - # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d"; -- 2.49.0 From d7a442020503ad52b4252d1eacb8521e8ee53114 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Aug 2023 15:56:09 +0200 Subject: [PATCH 122/472] Add the bay host name --- m/common/net.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/net.nix b/m/common/net.nix index 9d6a28a..354fa5d 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -31,7 +31,7 @@ 84.88.53.236 ssfhead.bsc.es ssfhead # Node Entry for node: mds01 (ID=72) - 10.0.40.40 mds01 mds01-eth0 + 10.0.40.40 bay mds01 mds01-eth0 10.0.42.40 mds01-ib0 10.0.40.141 mds01-ipmi0 -- 2.49.0 From a355926cf08183456d61e2f9e8b644dca31272d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Aug 2023 15:57:25 +0200 Subject: [PATCH 123/472] Add ceph config in bay --- m/bay/configuration.nix | 68 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index b42abf1..847e162 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -1,4 +1,4 @@ -{ config, pkgs, ... }: +{ config, pkgs, lib, ... }: { imports = [ @@ -8,6 +8,14 @@ # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d"; + environment.systemPackages = with pkgs; [ + ceph + ]; + + services.slurm = { + client.enable = lib.mkForce false; + }; + networking = { hostName = "bay"; interfaces.eno1.ipv4.addresses = [ { @@ -19,4 +27,62 @@ prefixLength = 24; } ]; }; + + services.ceph = { + enable = true; + global = { + fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b"; + monHost = "10.0.40.40"; + monInitialMembers = "bay"; + clusterNetwork = "10.0.40.40/24"; # Use Ethernet only + }; + mds = { + enable = true; + daemons = [ "mds0" "mds1" ]; + extraConfig = { + "host" = "bay"; + }; + }; + mgr = { + enable = true; + daemons = [ "bay" ]; + }; + mon = { + enable = true; + daemons = [ "bay" ]; + }; + osd = { + enable = true; + # One daemon per NVME disk + daemons = [ "0" "1" "2" "3" ]; + extraConfig = { + "osd crush chooseleaf type" = "0"; + "osd journal size" = "10000"; + "osd pool default min size" = "2"; + "osd pool default pg num" = "200"; + "osd pool default pgp num" = "200"; + "osd pool default size" = "3"; + }; + }; + }; + + # Missing service for volumes, see: + # https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/ + systemd.services.ceph-volume = { + enable = true; + description = "Ceph Volume activation"; + unitConfig = { + Type = "oneshot"; + After = "local-fs.target"; + Wants = "local-fs.target"; + }; + path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ]; + serviceConfig = { + KillMode = "none"; + Environment = "CEPH_VOLUME_TIMEOUT=10000"; + ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'"; + TimeoutSec = "0"; + }; + wantedBy = [ "multi-user.target" ]; + }; } -- 2.49.0 From a5fae4a2899cbe43e1ba20bb1cd3859090bf83f2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Aug 2023 15:57:49 +0200 Subject: [PATCH 124/472] Mount the ceph filesystem in hut --- m/hut/ceph.nix | 14 ++++++++++++++ m/hut/configuration.nix | 1 + 2 files changed, 15 insertions(+) create mode 100644 m/hut/ceph.nix diff --git a/m/hut/ceph.nix b/m/hut/ceph.nix new file mode 100644 index 0000000..fab5493 --- /dev/null +++ b/m/hut/ceph.nix @@ -0,0 +1,14 @@ +{ pkgs, ... }: + +{ + environment.systemPackages = [ pkgs.ceph-client ]; + + # We need the ceph module loaded as the mount.ceph binary fails to run the + # modprobe command. + boot.kernelModules = [ "ceph" ]; + + fileSystems."/ceph" = { + fsType = "ceph"; + device = "animal@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/"; + }; +} diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 82d4c34..8e677d0 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -8,6 +8,7 @@ ./monitoring.nix ./nfs.nix ./slurm-daemon.nix + ./ceph.nix agenix.nixosModules.default ]; -- 2.49.0 From d54dcc8d8f391394c5d4e3999321b4f90b9bd258 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Aug 2023 16:33:55 +0200 Subject: [PATCH 125/472] Add ceph metrics to prometheus --- m/common/net.nix | 2 +- m/hut/monitoring.nix | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/m/common/net.nix b/m/common/net.nix index 354fa5d..de2b28d 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -12,7 +12,7 @@ nameservers = ["8.8.8.8"]; proxy = { default = "http://localhost:23080/"; - noProxy = "127.0.0.1,localhost,internal.domain"; + noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40"; # Don't set all_proxy as go complains and breaks the gitlab runner, see: # https://github.com/golang/go/issues/16715 allProxy = null; diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index a00fb9b..4bc3d24 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -67,6 +67,7 @@ "127.0.0.1:9323" "127.0.0.1:9252" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" + "10.0.40.40:9283" # Ceph ]; }]; } -- 2.49.0 From d81d9d58e1e03c9c0eaa17da94a13eaaef03c8c4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 23 Aug 2023 17:15:26 +0200 Subject: [PATCH 126/472] Add rarias key for secrets --- m/hut/secrets.nix | 11 ++++++----- m/hut/secrets/nosv-token.age | Bin 501 -> 541 bytes m/hut/secrets/ovni-token.age | Bin 538 -> 610 bytes 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/m/hut/secrets.nix b/m/hut/secrets.nix index 7c5aea7..2c4d823 100644 --- a/m/hut/secrets.nix +++ b/m/hut/secrets.nix @@ -1,9 +1,10 @@ let - root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb"; - system = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; - systems = [ root system ]; + rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; + root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb"; + hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; + default = [ rarias root hut ]; in { - "secrets/ovni-token.age".publicKeys = systems; - "secrets/nosv-token.age".publicKeys = systems; + "secrets/ovni-token.age".publicKeys = default; + "secrets/nosv-token.age".publicKeys = default; } diff --git a/m/hut/secrets/nosv-token.age b/m/hut/secrets/nosv-token.age index b26d482ab5e0522be3058a61ea400bb213e6c327..4b495a944de69fccbfdde7394b7f63f80e53e06e 100644 GIT binary patch delta 514 zcmey$JeOsHYQ3{#xVuT9f}5ead10DsdVyzTvU`xXUr4Y~L8`t{ZjfcLnL&1TXq9(p zVt}`Waan{TSF(RtRd#8%bEu_-YeZ?1t9MzBzkZsJXIgPlMoF=^Q*ohXfV;ViX{NR> zm#&>cadC!jYKoDmsiCEUZ?KzjO1eUZe^{=$p+R_RaecCRdQ?DAv5T*1iDQ~}fpK|y zg>$Z1aAaw@VSZ6iYL*38NpYTUWPU)BvtfF9UU+syk$I`JxnX%xN@|6FgkfcBSw*mq zkBhmnr%yh*bsmvMl|ktWp*iNpnMv+mg`wu9MS(t1Mfw(%K8D7Dd3k~LX1TeRA(fVu z6_pkFp@Bx8T$zSdkr}y>&gQ21RqmChh2bU{B|*uBi5W@G$?lbgreUc8J|;#%5#gn- zAnQ`KobI+e^0KspQr%0O(yG&3T}`+;?IzsiWH9=*X?EV$UukFe^kmq%PSDBnQT~2$>#r@E z$KTG`tn%!f`c~iB^7V->N8WeLik{TcTlJ>s(r($31^%*rsqr28rdrC@@|j1OFR#dX sx$&zv>yEYSHkwa5uJ%XxTf@tbS9P{LY&lcKx9OpM;+I)DS{{nu0ImnTuK)l5 delta 454 zcmbQs@|AglYMgJdn{i6If~T3jc4T>Zp>buEPnJt*kWorxL|M91VQO}Ec0r1NVqkD! zSZb74rBOvTmuXIbyMLgEQJ%N4d75udQd)?oQDT;3phs13QMzMxU}Tp0B~Bs6sb1lxj(OguC1w$3&Ut>uW*MPeDF#*9 zK9)gYmJxnFAz{gmDgK_WZXu}|-XTFHfhIXso(93%L8V3d`DV#nx^@bwe#t4p?#8<5 zMX8C!sS2v4nM#qBhAv!D6$<`sS|n>7J?DPHBb_CBgYc#u150QCWGZrukl$ z<{6fWZdnykghNTwJ=kx(db>0g*l~?&-O{-ubz?j;0|M zxsJZ&>3-op7J&v~mf3~cWtQ6IK9ObR`CLl>HPeO97Z&tX#2pj)eX#H)L&&G<&Q=+Q z*qaaLu^l)oB7E=p4b^GPZNI&Z4zu5%7a@5+I`-+jg}Lpg&u*VIf4{=#i$V+D9QaW^ zLHFQ^xpIC=ZBJGvmT!}MDCOOqe(bkFqpax+#;0Cp(KpKKS-zFUu=T{cadC!jYKoDmsiCEUZ?KzjO1grfnU8xwR#1qMPrZw;kEv6lk5OqxXqscBYk+C4 zYpS-snX^Z3PH9z+b7d%(ucy09WtvlpcDQS9d7g1mc7~y)VW69Lh>=rfxnF9&S&&Dt zL1LbzMUe@*bsmvMl|ktW`i?1?d6`D#1|j~&S(cGGuC4(k0XZfvjwV6%?j=!C5tTmq zrsbLGF2y0qTuHeF>4n8bc^L&37GVKJ*-=G>VVO~7md1ge0YMQd5ndKKNs&QCnMFo{ zAnQD}0%M$X(~D9Qi&GWMt&%JiObsmqqOGjBf>J_KlEZS#O7paXybZ&GjNH7cT+J#x z^Gr=#a?HYf3ca(l^6D!SGrV)OP25VO9J6y1i!(hdeG*ghxpZ}P6?{Y89QB=xg9AL= zyezUak~~Xut8y(feFEIfi!4hrD^fD^L&EY>6D^!wxmHDWh_5cAV3J?plH3D3K>J^|rsZC9{}%|JMEK7+dOj((LY{?p^z| z+C1YNrDm$ru~hNy I%u2Qx0L$>o^#A|> delta 492 zcmaFFGK*z`YMgJdn{i6If@fJyN@;ebaY$B`b4f;|WssA;SCWCYZ+>Znky~WAfmuer zUrL~MnuVbSm!EfDX<$)Epi@Lvh+|QNyP3YJcbI8%O1i$KS7^3=j&WI7N@SryT71L6sUfI4}POhmT z87|pbX^GyJ!KR58h91e4iBV;iRryKYj(%aju9>+hrOEo9{-!2ex^@a`sZOf3mAdIg zsfop@3i;N05s?a!nMG9!wGkOHF78~;**-z_&N+qQLD|kl=6M$RrlDb$+Sw`2nb}5} zA@2Gqo`%I{W@gUb$>|kIjsf`@xkZ&;RsP;7=|Mh~RnBP!T+YD(zM*MFnH9OEfhDEE z8JX!Nxshf8xkip$y1Kdw?yjYNzIowM#@bG1fqoHX=EdeoX Date: Wed, 23 Aug 2023 17:18:17 +0200 Subject: [PATCH 127/472] Store ceph secret key in age This allows a node to mount the ceph FS without any extra ceph configuration in /etc/ceph. --- m/hut/ceph.nix | 10 ++++++++-- m/hut/secrets.nix | 1 + m/hut/secrets/ceph-user.age | 11 +++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 m/hut/secrets/ceph-user.age diff --git a/m/hut/ceph.nix b/m/hut/ceph.nix index fab5493..722466f 100644 --- a/m/hut/ceph.nix +++ b/m/hut/ceph.nix @@ -1,4 +1,4 @@ -{ pkgs, ... }: +{ config, pkgs, ... }: { environment.systemPackages = [ pkgs.ceph-client ]; @@ -7,8 +7,14 @@ # modprobe command. boot.kernelModules = [ "ceph" ]; + age.secrets."secrets/ceph-user".file = ./secrets/ceph-user.age; + fileSystems."/ceph" = { fsType = "ceph"; - device = "animal@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/"; + device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/"; + options = [ + "mon_addr=10.0.40.40" + "secretfile=${config.age.secrets."secrets/ceph-user".path}" + ]; }; } diff --git a/m/hut/secrets.nix b/m/hut/secrets.nix index 2c4d823..d22add6 100644 --- a/m/hut/secrets.nix +++ b/m/hut/secrets.nix @@ -7,4 +7,5 @@ in { "secrets/ovni-token.age".publicKeys = default; "secrets/nosv-token.age".publicKeys = default; + "secrets/ceph-user.age".publicKeys = default; } diff --git a/m/hut/secrets/ceph-user.age b/m/hut/secrets/ceph-user.age new file mode 100644 index 0000000..735afca --- /dev/null +++ b/m/hut/secrets/ceph-user.age @@ -0,0 +1,11 @@ +age-encryption.org/v1 +-> ssh-ed25519 CAWG4Q 35Ak+Mep9k5KnDLF1ywDbMD4l4mRFg6D0et19tqXxAw +Wgr+CX4rzrPmUszSidtLAVSvgD80F2dqtd92hGZIFwo +-> ssh-ed25519 MSF3dg OVFvpkAyWTowtxsafstX31H/hJpNZmnOCbvqMIN0+AQ +VxjRcQmp+BadEh2y0PB96EeizIl3tTQpVu0CWHmsc1s +-> ssh-ed25519 HY2yRg MJSQIpre9m0XnojgXuKQ/+hVBZNrZNGZqplwhqicpjI +CLkE52iqpoqSnbzisNjQgxTfNqKeaRl5ntcw1d+ZDyQ +-> m$8`De%~-grease '85p}`by +52zMpprONcawWDDtzHdWNwFoYXErPUnVjhSONbUBpDlqAmJmD1LcAnsU +--- 0vZOPyXQIMMGTwgFfvm8Sn8O7vjrsjGUEy5m/BASCyc +|)*_DUS`r sN[֌^e+A 1G.#mW 5 ( \ No newline at end of file -- 2.49.0 From 832866cbfab48a39bdaca33209fac1dfa2e0f2e2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 23 Aug 2023 17:42:50 +0200 Subject: [PATCH 128/472] Add agenix to PATH in hut --- m/hut/configuration.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 8e677d0..14c7881 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -12,6 +12,10 @@ agenix.nixosModules.default ]; + environment.systemPackages = [ + agenix.packages.x86_64-linux.default + ]; + # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; -- 2.49.0 From 4c806b8ae974178633160dc0c503e521e187f947 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 24 Aug 2023 12:29:44 +0200 Subject: [PATCH 129/472] Add section to enable serial console --- doc/install.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/install.md b/doc/install.md index 8eae8c7..d5e279d 100644 --- a/doc/install.md +++ b/doc/install.md @@ -3,6 +3,34 @@ This article shows the steps to install NixOS in a node following the configuration of the repo. +## Enable the serial console + +By default, the nodes have the serial console disabled in the GRUB and also boot +without the serial enabled. + +To enable the serial console in the GRUB, set in /etc/default/grub the following +lines: + +``` +GRUB_TERMINAL="console serial" +GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=1" +``` + +To boot Linux with the serial enabled, so you can see the boot log and login via +serial set: + +``` +GRUB_CMDLINE_LINUX="console=ttyS0,115200n8 console=tty0" +``` + +Then update the grub config: + +``` +# grub2-mkconfig -o /boot/grub2/grub.cfg +``` + +And reboot. + ## Prepare the disk Create a main partition and label it `nixos` following [the manual][1]. -- 2.49.0 From 3276f54e867025d188177efae75a255a54ae0b0e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 24 Aug 2023 12:30:46 +0200 Subject: [PATCH 130/472] Add lake2 bootstrap config --- flake.nix | 1 + m/lake2/configuration.nix | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 m/lake2/configuration.nix diff --git a/flake.nix b/flake.nix index 6ce0689..7bd69c0 100644 --- a/flake.nix +++ b/flake.nix @@ -22,6 +22,7 @@ in eudy = mkConf "eudy"; koro = mkConf "koro"; bay = mkConf "bay"; + lake2 = mkConf "lake2"; }; packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs; diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix new file mode 100644 index 0000000..37bd22a --- /dev/null +++ b/m/lake2/configuration.nix @@ -0,0 +1,32 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/main.nix + (modulesPath + "/installer/netboot/netboot-minimal.nix") + ]; + + # For now we install NixOS in the first nvme disk (nvme0n1), as this node only + # has one SSD already used for SUSE. + boot.loader.grub.device = "/dev/disk/by-id/nvme-SNVMe_INTEL_SSDPE2MD02CVFT5281004L2P0KGN"; + + environment.systemPackages = with pkgs; [ + ceph + ]; + + services.slurm = { + client.enable = lib.mkForce false; + }; + + networking = { + hostName = "lake2"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.42"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.42"; + prefixLength = 24; + } ]; + }; +} -- 2.49.0 From 394c7ecd7bc40f040a805b57181eb6c8e4e6724d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 24 Aug 2023 13:54:22 +0200 Subject: [PATCH 131/472] Prepare lake2 config after bootstrap The disk ID is different under NixOS. --- m/lake2/configuration.nix | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 37bd22a..1b89226 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -3,12 +3,11 @@ { imports = [ ../common/main.nix - (modulesPath + "/installer/netboot/netboot-minimal.nix") ]; # For now we install NixOS in the first nvme disk (nvme0n1), as this node only # has one SSD already used for SUSE. - boot.loader.grub.device = "/dev/disk/by-id/nvme-SNVMe_INTEL_SSDPE2MD02CVFT5281004L2P0KGN"; + boot.loader.grub.device = "/dev/disk/by-id/nvme-INTEL_SSDPE2MD020T4_CVFT5281004L2P0KGN"; environment.systemPackages = with pkgs; [ ceph -- 2.49.0 From fb1744306dc57a950867f6381bd07c1a8641448d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 24 Aug 2023 15:27:37 +0200 Subject: [PATCH 132/472] Specify the disk by path --- m/lake2/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 1b89226..99fffa3 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -7,7 +7,7 @@ # For now we install NixOS in the first nvme disk (nvme0n1), as this node only # has one SSD already used for SUSE. - boot.loader.grub.device = "/dev/disk/by-id/nvme-INTEL_SSDPE2MD020T4_CVFT5281004L2P0KGN"; + boot.loader.grub.device = "/dev/disk/by-path/pci-0000:83:00.0-nvme-1"; environment.systemPackages = with pkgs; [ ceph -- 2.49.0 From 6c0c26b3aa4dc8ea3582a33690ad668cca8182b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 24 Aug 2023 19:08:23 +0200 Subject: [PATCH 133/472] Enable netboot again for PXE --- m/lake2/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 99fffa3..618acfc 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/main.nix + (modulesPath + "/installer/netboot/netboot-minimal.nix") ]; # For now we install NixOS in the first nvme disk (nvme0n1), as this node only -- 2.49.0 From 4b78ec91345abf0f832feafae16449daec6c1c2a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 12:03:30 +0200 Subject: [PATCH 134/472] Add PXE helper --- m/hut/configuration.nix | 1 + m/hut/pxe.nix | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 m/hut/pxe.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 14c7881..1356780 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -9,6 +9,7 @@ ./nfs.nix ./slurm-daemon.nix ./ceph.nix + ./pxe.nix agenix.nixosModules.default ]; diff --git a/m/hut/pxe.nix b/m/hut/pxe.nix new file mode 100644 index 0000000..65e6fb4 --- /dev/null +++ b/m/hut/pxe.nix @@ -0,0 +1,26 @@ +{ theFlake, pkgs, ... }: + +# This module describes a script that can launch the pixiecore daemon to serve a +# NixOS image via PXE to a node to directly boot from there, without requiring a +# working disk. + +let + # The host config must have the netboot-minimal.nix module too + host = theFlake.nixosConfigurations.lake2; + sys = host.config.system; + build = sys.build; + kernel = "${build.kernel}/bzImage"; + initrd = "${build.netbootRamdisk}/initrd"; + init = "${build.toplevel}/init"; + + script = pkgs.writeShellScriptBin "pixiecore-helper" '' + #!/usr/bin/env bash -x + + ${pkgs.pixiecore}/bin/pixiecore \ + boot ${kernel} ${initrd} --cmdline "init=${init} loglevel=4" \ + --debug --dhcp-no-bind --port 64172 --status-port 64172 "$@" + ''; +in +{ + environment.systemPackages = [ script ]; +} -- 2.49.0 From f18f1937aea55c57eda7ea6da54a22cb4d5d2230 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 13:21:00 +0200 Subject: [PATCH 135/472] Disable pixiecore in hut for now --- m/hut/configuration.nix | 2 +- m/hut/pxe.nix | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 1356780..b4ba36f 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -9,7 +9,7 @@ ./nfs.nix ./slurm-daemon.nix ./ceph.nix - ./pxe.nix + #./pxe.nix agenix.nixosModules.default ]; diff --git a/m/hut/pxe.nix b/m/hut/pxe.nix index 65e6fb4..e3a74e2 100644 --- a/m/hut/pxe.nix +++ b/m/hut/pxe.nix @@ -22,5 +22,14 @@ let ''; in { + ## We need a DHCP server to provide the IP + #services.dnsmasq = { + # enable = true; + # settings = { + # domain-needed = true; + # dhcp-range = [ "192.168.0.2,192.168.0.254" ]; + # }; + #}; + environment.systemPackages = [ script ]; } -- 2.49.0 From 0b22a1b8a45df3a13ad40022f8261c6b255354ce Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 13:39:01 +0200 Subject: [PATCH 136/472] Remove netboot module --- m/lake2/configuration.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 618acfc..99fffa3 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -3,7 +3,6 @@ { imports = [ ../common/main.nix - (modulesPath + "/installer/netboot/netboot-minimal.nix") ]; # For now we install NixOS in the first nvme disk (nvme0n1), as this node only -- 2.49.0 From be4187de3cd410d07ec0eadb483612c9714dc112 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 13:40:10 +0200 Subject: [PATCH 137/472] Use the sda for lake2 --- m/lake2/configuration.nix | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 99fffa3..255a53a 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -5,9 +5,7 @@ ../common/main.nix ]; - # For now we install NixOS in the first nvme disk (nvme0n1), as this node only - # has one SSD already used for SUSE. - boot.loader.grub.device = "/dev/disk/by-path/pci-0000:83:00.0-nvme-1"; + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; environment.systemPackages = with pkgs; [ ceph -- 2.49.0 From 6c656182f1950e2f73d65f9f304b232ba47fa754 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 14:44:35 +0200 Subject: [PATCH 138/472] Add the lake2 hostname to the hosts --- m/common/net.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/net.nix b/m/common/net.nix index de2b28d..09765c4 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -41,7 +41,7 @@ 10.0.40.142 oss01-ipmi0 # Node Entry for node: oss02 (ID=74) - 10.0.40.42 oss02 oss02-eth0 + 10.0.40.42 lake2 oss02 oss02-eth0 10.0.42.42 oss02-ib0 10.0.40.143 oss02-ipmi0 -- 2.49.0 From 0f54d63a462b581b9f1a8bc91a36a30ad553b471 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 14:44:53 +0200 Subject: [PATCH 139/472] Enable ceph osd daemons in lake2 --- m/lake2/configuration.nix | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 255a53a..c8225db 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -15,6 +15,29 @@ client.enable = lib.mkForce false; }; + services.ceph = { + enable = true; + global = { + fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b"; + monHost = "10.0.40.40"; + monInitialMembers = "bay"; + clusterNetwork = "10.0.40.40/24"; # Use Ethernet only + }; + osd = { + enable = true; + # One daemon per NVME disk + daemons = [ "4" "5" "6" "7" ]; + extraConfig = { + "osd crush chooseleaf type" = "0"; + "osd journal size" = "10000"; + "osd pool default min size" = "2"; + "osd pool default pg num" = "200"; + "osd pool default pgp num" = "200"; + "osd pool default size" = "3"; + }; + }; + }; + networking = { hostName = "lake2"; interfaces.eno1.ipv4.addresses = [ { -- 2.49.0 From b4015ded86f852c7b3a02611d1af9796b535424e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 18:12:00 +0200 Subject: [PATCH 140/472] Move pkgs overlay to overlay.nix --- m/common/main.nix | 2 +- pkgs/{mpi.nix => overlay.nix} | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) rename pkgs/{mpi.nix => overlay.nix} (91%) diff --git a/m/common/main.nix b/m/common/main.nix index 7054495..5d77ddc 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -14,7 +14,7 @@ nixpkgs.overlays = [ bscpkgs.bscOverlay - (import ../../pkgs/mpi.nix) + (import ../../pkgs/overlay.nix) ]; nix.nixPath = [ diff --git a/pkgs/mpi.nix b/pkgs/overlay.nix similarity index 91% rename from pkgs/mpi.nix rename to pkgs/overlay.nix index cdaeadd..b86d2d9 100644 --- a/pkgs/mpi.nix +++ b/pkgs/overlay.nix @@ -1,7 +1,10 @@ final: prev: { bsc = prev.bsc.extend (bscFinal: bscPrev: { + # Set MPICH as default mpi = bscFinal.mpich; + + # Configure the network for MPICH mpich = with final; prev.mpich.overrideAttrs (old: { buildInput = old.buildInputs ++ [ libfabric -- 2.49.0 From 8912d2b9bc66a2845edf587f1769456c4537ad94 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 25 Aug 2023 18:12:46 +0200 Subject: [PATCH 141/472] Update ceph to 18.2.0 in overlay --- pkgs/ceph.nix | 405 +++++++++++++++++++++++++++++++++++++++++++++++ pkgs/overlay.nix | 7 + 2 files changed, 412 insertions(+) create mode 100644 pkgs/ceph.nix diff --git a/pkgs/ceph.nix b/pkgs/ceph.nix new file mode 100644 index 0000000..5247862 --- /dev/null +++ b/pkgs/ceph.nix @@ -0,0 +1,405 @@ +{ lib +, stdenv +, runCommand +, fetchurl +, fetchFromGitHub +, fetchPypi + +# Build time +, cmake +, ensureNewerSourcesHook +, fmt +, git +, makeWrapper +, nasm +, pkg-config +, which + +# Tests +, nixosTests + +# Runtime dependencies +, arrow-cpp +, babeltrace +, boost179 +, bzip2 +, cryptsetup +, cunit +, doxygen +, gperf +, graphviz +, gtest +, icu +, libcap +, libcap_ng +, libnl +, libxml2 +, lttng-ust +, lua +, lz4 +, oath-toolkit +, openldap +, python310 +, rdkafka +, rocksdb +, snappy +, sqlite +, utf8proc +, zlib +, zstd + +# Optional Dependencies +, curl ? null +, expat ? null +, fuse ? null +, libatomic_ops ? null +, libedit ? null +, libs3 ? null +, yasm ? null + +# Mallocs +, gperftools ? null +, jemalloc ? null + +# Crypto Dependencies +, cryptopp ? null +, nspr ? null +, nss ? null + +# Linux Only Dependencies +, linuxHeaders +, util-linux +, libuuid +, udev +, keyutils +, rdma-core +, rabbitmq-c +, libaio ? null +, libxfs ? null +, liburing ? null +, zfs ? null +, ... +}: + +# We must have one crypto library +assert cryptopp != null || (nss != null && nspr != null); + +let + shouldUsePkg = pkg: if pkg != null && pkg.meta.available then pkg else null; + + optYasm = shouldUsePkg yasm; + optExpat = shouldUsePkg expat; + optCurl = shouldUsePkg curl; + optFuse = shouldUsePkg fuse; + optLibedit = shouldUsePkg libedit; + optLibatomic_ops = shouldUsePkg libatomic_ops; + optLibs3 = shouldUsePkg libs3; + + optJemalloc = shouldUsePkg jemalloc; + optGperftools = shouldUsePkg gperftools; + + optCryptopp = shouldUsePkg cryptopp; + optNss = shouldUsePkg nss; + optNspr = shouldUsePkg nspr; + + optLibaio = shouldUsePkg libaio; + optLibxfs = shouldUsePkg libxfs; + optZfs = shouldUsePkg zfs; + + # Downgrade rocksdb, 7.10 breaks ceph + rocksdb' = rocksdb.overrideAttrs { + version = "7.9.2"; + src = fetchFromGitHub { + owner = "facebook"; + repo = "rocksdb"; + rev = "refs/tags/v7.9.2"; + hash = "sha256-5P7IqJ14EZzDkbjaBvbix04ceGGdlWBuVFH/5dpD5VM="; + }; + }; + + hasRadosgw = optExpat != null && optCurl != null && optLibedit != null; + + # Malloc implementation (can be jemalloc, tcmalloc or null) + malloc = if optJemalloc != null then optJemalloc else optGperftools; + + # We prefer nss over cryptopp + cryptoStr = if optNss != null && optNspr != null then "nss" else + if optCryptopp != null then "cryptopp" else "none"; + + cryptoLibsMap = { + nss = [ optNss optNspr ]; + cryptopp = [ optCryptopp ]; + none = [ ]; + }; + + getMeta = description: with lib; { + homepage = "https://ceph.io/en/"; + inherit description; + license = with licenses; [ lgpl21 gpl2 bsd3 mit publicDomain ]; + maintainers = with maintainers; [ adev ak johanot krav ]; + platforms = [ "x86_64-linux" "aarch64-linux" ]; + }; + + ceph-common = with python.pkgs; buildPythonPackage { + pname = "ceph-common"; + inherit src version; + + sourceRoot = "ceph-${version}/src/python-common"; + + propagatedBuildInputs = [ + pyyaml + ]; + + nativeCheckInputs = [ + pytestCheckHook + ]; + + disabledTests = [ + # requires network access + "test_valid_addr" + ]; + + meta = getMeta "Ceph common module for code shared by manager modules"; + }; + + # Watch out for python <> boost compatibility + python = python310.override { + packageOverrides = self: super: { + sqlalchemy = super.sqlalchemy.overridePythonAttrs rec { + version = "1.4.46"; + src = fetchPypi { + pname = "SQLAlchemy"; + inherit version; + hash = "sha256-aRO4JH2KKS74MVFipRkx4rQM6RaB8bbxj2lwRSAMSjA="; + }; + disabledTestPaths = [ + "test/aaa_profiling" + "test/ext/mypy" + ]; + }; + }; + }; + + boost = boost179.override { + enablePython = true; + inherit python; + }; + + # TODO: split this off in build and runtime environment + ceph-python-env = python.withPackages (ps: with ps; [ + ceph-common + + # build time + cython + + # debian/control + bcrypt + cherrypy + influxdb + jinja2 + kubernetes + natsort + numpy + pecan + prettytable + pyjwt + pyopenssl + python-dateutil + pyyaml + requests + routes + scikit-learn + scipy + setuptools + sphinx + virtualenv + werkzeug + + # src/pybind/mgr/requirements-required.txt + cryptography + jsonpatch + + # src/tools/cephfs/shell/setup.py + cmd2 + colorama + ]); + inherit (ceph-python-env.python) sitePackages; + + version = "18.2.0"; + src = fetchurl { + url = "https://download.ceph.com/tarballs/ceph-${version}.tar.gz"; + hash = "sha256:0k9nl6xi5brva51rr14m7ig27mmmd7vrpchcmqc40q3c2khn6ns9"; + }; +in rec { + ceph = stdenv.mkDerivation { + pname = "ceph"; + inherit src version; + + nativeBuildInputs = [ + cmake + fmt + git + makeWrapper + nasm + pkg-config + python + python.pkgs.python # for the toPythonPath function + python.pkgs.wrapPython + which + (ensureNewerSourcesHook { year = "1980"; }) + # for building docs/man-pages presumably + doxygen + graphviz + ]; + + enableParallelBuilding = true; + + buildInputs = cryptoLibsMap.${cryptoStr} ++ [ + arrow-cpp + babeltrace + boost + bzip2 + ceph-python-env + cryptsetup + cunit + gperf + gtest + icu + libcap + libnl + libxml2 + lttng-ust + lua + lz4 + malloc + oath-toolkit + openldap + optLibatomic_ops + optLibs3 + optYasm + rdkafka + rocksdb' + snappy + sqlite + utf8proc + zlib + zstd + ] ++ lib.optionals stdenv.isLinux [ + keyutils + libcap_ng + liburing + libuuid + linuxHeaders + optLibaio + optLibxfs + optZfs + rabbitmq-c + rdma-core + udev + util-linux + ] ++ lib.optionals hasRadosgw [ + optCurl + optExpat + optFuse + optLibedit + ]; + + pythonPath = [ ceph-python-env "${placeholder "out"}/${ceph-python-env.sitePackages}" ]; + + preConfigure ='' + substituteInPlace src/common/module.c --replace "/sbin/modinfo" "modinfo" + substituteInPlace src/common/module.c --replace "/sbin/modprobe" "modprobe" + substituteInPlace src/common/module.c --replace "/bin/grep" "grep" + + # install target needs to be in PYTHONPATH for "*.pth support" check to succeed + # set PYTHONPATH, so the build system doesn't silently skip installing ceph-volume and others + export PYTHONPATH=${ceph-python-env}/${sitePackages}:$lib/${sitePackages}:$out/${sitePackages} + patchShebangs src/ + ''; + + cmakeFlags = [ + "-DCMAKE_INSTALL_DATADIR=${placeholder "lib"}/lib" + + "-DWITH_CEPHFS_SHELL:BOOL=ON" + "-DWITH_SYSTEMD:BOOL=OFF" + # `WITH_JAEGER` requires `thrift` as a depenedncy (fine), but the build fails with: + # CMake Error at src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-Release.cmake:49 (message): + # Command failed: 2 + # + # 'make' 'opentelemetry_trace' 'opentelemetry_exporter_jaeger_trace' + # + # See also + # + # /build/ceph-18.2.0/build/src/opentelemetry-cpp/src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-*.log + # and that file contains: + # /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc: In member function 'virtual void opentelemetry::v1::exporter::jaeger::TUDPTransport::close()': + # /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc:71:7: error: '::close' has not been declared; did you mean 'pclose'? + # 71 | ::THRIFT_CLOSESOCKET(socket_); + # | ^~~~~~~~~~~~~~~~~~ + # Looks like `close()` is somehow not included. + # But the relevant code is already removed in `open-telemetry` 1.10: https://github.com/open-telemetry/opentelemetry-cpp/pull/2031 + # So it's proably not worth trying to fix that for this Ceph version, + # and instead just disable Ceph's Jaeger support. + "-DWITH_JAEGER:BOOL=OFF" + "-DWITH_TESTS:BOOL=OFF" + + # Use our own libraries, where possible + "-DWITH_SYSTEM_ARROW:BOOL=ON" # Only used if other options enable Arrow support. + "-DWITH_SYSTEM_BOOST:BOOL=ON" + "-DWITH_SYSTEM_GTEST:BOOL=ON" + "-DWITH_SYSTEM_ROCKSDB:BOOL=ON" + "-DWITH_SYSTEM_UTF8PROC:BOOL=ON" + "-DWITH_SYSTEM_ZSTD:BOOL=ON" + + # TODO breaks with sandbox, tries to download stuff with npm + "-DWITH_MGR_DASHBOARD_FRONTEND:BOOL=OFF" + # WITH_XFS has been set default ON from Ceph 16, keeping it optional in nixpkgs for now + ''-DWITH_XFS=${if optLibxfs != null then "ON" else "OFF"}'' + ] ++ lib.optional stdenv.isLinux "-DWITH_SYSTEM_LIBURING=ON"; + + postFixup = '' + wrapPythonPrograms + wrapProgram $out/bin/ceph-mgr --prefix PYTHONPATH ":" "$(toPythonPath ${placeholder "out"}):$(toPythonPath ${ceph-python-env})" + + # Test that ceph-volume exists since the build system has a tendency to + # silently drop it with misconfigurations. + test -f $out/bin/ceph-volume + ''; + + outputs = [ "out" "lib" "dev" "doc" "man" ]; + + doCheck = false; # uses pip to install things from the internet + + # Takes 7+h to build with 2 cores. + requiredSystemFeatures = [ "big-parallel" ]; + + meta = getMeta "Distributed storage system"; + + passthru = { + inherit version; + tests = { + inherit (nixosTests) + ceph-multi-node + ceph-single-node + ceph-single-node-bluestore; + }; + }; + }; + + ceph-client = runCommand "ceph-client-${version}" { + meta = getMeta "Tools needed to mount Ceph's RADOS Block Devices/Cephfs"; + } '' + mkdir -p $out/{bin,etc,${sitePackages},share/bash-completion/completions} + cp -r ${ceph}/bin/{ceph,.ceph-wrapped,rados,rbd,rbdmap} $out/bin + cp -r ${ceph}/bin/ceph-{authtool,conf,dencoder,rbdnamer,syn} $out/bin + cp -r ${ceph}/bin/rbd-replay* $out/bin + cp -r ${ceph}/sbin/mount.ceph $out/bin + cp -r ${ceph}/sbin/mount.fuse.ceph $out/bin + ln -s bin $out/sbin + cp -r ${ceph}/${sitePackages}/* $out/${sitePackages} + cp -r ${ceph}/etc/bash_completion.d $out/share/bash-completion/completions + # wrapPythonPrograms modifies .ceph-wrapped, so lets just update its paths + substituteInPlace $out/bin/ceph --replace ${ceph} $out + substituteInPlace $out/bin/.ceph-wrapped --replace ${ceph} $out + ''; +} diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index b86d2d9..03208c5 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -25,4 +25,11 @@ final: prev: ]; }); }); + + # Update ceph to 18.2.0 until it lands in nixpkgs, see: + # https://github.com/NixOS/nixpkgs/pull/247849 + inherit (prev.callPackage ./ceph.nix { + lua = prev.lua5_4; + fmt = prev.fmt_8; + }) ceph ceph-client; } -- 2.49.0 From a260a1bc1b14b26dd7ca3b021aa6e8030d4132c4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 28 Aug 2023 17:58:08 +0200 Subject: [PATCH 142/472] Switch ceph logs to journal --- m/bay/configuration.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 847e162..41d0a56 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -36,6 +36,14 @@ monInitialMembers = "bay"; clusterNetwork = "10.0.40.40/24"; # Use Ethernet only }; + extraConfig = { + # Only log to stderr so it appears in the journal + "log_file" = "/dev/null"; + "mon_cluster_log_file" = "/dev/null"; + "log_to_stderr" = "true"; + "err_to_stderr" = "true"; + "log_to_file" = "false"; + }; mds = { enable = true; daemons = [ "mds0" "mds1" ]; -- 2.49.0 From 4fa074f89340f27105ced2e1b70c0c5128a39a63 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 28 Aug 2023 17:58:21 +0200 Subject: [PATCH 143/472] Add ceph tools in hut too --- m/hut/ceph.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/ceph.nix b/m/hut/ceph.nix index 722466f..59920e8 100644 --- a/m/hut/ceph.nix +++ b/m/hut/ceph.nix @@ -1,7 +1,7 @@ { config, pkgs, ... }: { - environment.systemPackages = [ pkgs.ceph-client ]; + environment.systemPackages = [ pkgs.ceph pkgs.ceph-client ]; # We need the ceph module loaded as the mount.ceph binary fails to run the # modprobe command. -- 2.49.0 From 86eacdd3e519abd8b8b2230aaee21591e9d0dee8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 29 Aug 2023 11:27:50 +0200 Subject: [PATCH 144/472] Add fio tool --- m/hut/ceph.nix | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/m/hut/ceph.nix b/m/hut/ceph.nix index 59920e8..3bebe11 100644 --- a/m/hut/ceph.nix +++ b/m/hut/ceph.nix @@ -1,7 +1,11 @@ { config, pkgs, ... }: { - environment.systemPackages = [ pkgs.ceph pkgs.ceph-client ]; + environment.systemPackages = with pkgs; [ + ceph + ceph-client + fio # For benchmarks + ]; # We need the ceph module loaded as the mount.ceph binary fails to run the # modprobe command. -- 2.49.0 From 2b7823788cc8297817fefb093d07355eee2aec4a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 29 Aug 2023 11:53:32 +0200 Subject: [PATCH 145/472] Add monitoring in the bay node --- m/bay/configuration.nix | 1 + m/bay/monitoring.nix | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 m/bay/monitoring.nix diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 41d0a56..6aff18e 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/main.nix + ./monitoring.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/bay/monitoring.nix b/m/bay/monitoring.nix new file mode 100644 index 0000000..0ef9209 --- /dev/null +++ b/m/bay/monitoring.nix @@ -0,0 +1,25 @@ +{ config, lib, ... }: + +{ + # We need access to the devices to monitor the disk space + systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only"; + + # Required to allow the smartctl exporter to read the nvme0 character device, + # see the commit message on: + # https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80 + services.udev.extraRules = '' + SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk" + ''; + + services.prometheus = { + exporters = { + node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + }; + smartctl.enable = true; + }; + }; +} -- 2.49.0 From 1266c8f04e82899f376c269c711ebcf0b5ce5354 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 29 Aug 2023 11:58:00 +0200 Subject: [PATCH 146/472] Scrape metrics from bay --- m/hut/monitoring.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 4bc3d24..65e68cc 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -67,7 +67,15 @@ "127.0.0.1:9323" "127.0.0.1:9252" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" + ]; + }]; + } + { + job_name = "bay"; + static_configs = [{ + targets = [ "10.0.40.40:9283" # Ceph + "10.0.40.40:9002" # Node exporter ]; }]; } -- 2.49.0 From db6bb90af8fc5af1d6bc62017ebb1122a52d9d85 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 29 Aug 2023 12:29:41 +0200 Subject: [PATCH 147/472] Also enable monitoring in lake2 --- m/bay/configuration.nix | 2 +- m/{bay => common}/monitoring.nix | 0 m/lake2/configuration.nix | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) rename m/{bay => common}/monitoring.nix (100%) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 6aff18e..5e2b342 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -3,7 +3,7 @@ { imports = [ ../common/main.nix - ./monitoring.nix + ../common/monitoring.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/bay/monitoring.nix b/m/common/monitoring.nix similarity index 100% rename from m/bay/monitoring.nix rename to m/common/monitoring.nix diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index c8225db..4b05147 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/main.nix + ../common/monitoring.nix ]; boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; -- 2.49.0 From e1d406023dcd143771e02629bd8b39473bd451c1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 29 Aug 2023 12:33:26 +0200 Subject: [PATCH 148/472] Scrape lake2 too --- m/hut/monitoring.nix | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 65e68cc..8a8646a 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -71,11 +71,12 @@ }]; } { - job_name = "bay"; + job_name = "ceph"; static_configs = [{ targets = [ - "10.0.40.40:9283" # Ceph + "10.0.40.40:9283" # Ceph statistics "10.0.40.40:9002" # Node exporter + "10.0.40.42:9002" # Node exporter ]; }]; } -- 2.49.0 From aad67b9d99777e1b3038e7c1aedf5b09ed5da3b9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 29 Aug 2023 18:47:25 +0200 Subject: [PATCH 149/472] Enable all osd on boot in lake2 --- m/lake2/configuration.nix | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 4b05147..58bfef8 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -50,4 +50,24 @@ prefixLength = 24; } ]; }; + + # Missing service for volumes, see: + # https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/ + systemd.services.ceph-volume = { + enable = true; + description = "Ceph Volume activation"; + unitConfig = { + Type = "oneshot"; + After = "local-fs.target"; + Wants = "local-fs.target"; + }; + path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ]; + serviceConfig = { + KillMode = "none"; + Environment = "CEPH_VOLUME_TIMEOUT=10000"; + ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'"; + TimeoutSec = "0"; + }; + wantedBy = [ "multi-user.target" ]; + }; } -- 2.49.0 From 9d93760e6f5bf4a8762d3cd74f5efe91404a0478 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 29 Aug 2023 22:26:12 +0200 Subject: [PATCH 150/472] Enable watchdog --- m/common/main.nix | 1 + m/common/watchdog.nix | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 m/common/watchdog.nix diff --git a/m/common/main.nix b/m/common/main.nix index 5d77ddc..f9141aa 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -10,6 +10,7 @@ ./slurm.nix ./ssh.nix ./users.nix + ./watchdog.nix ]; nixpkgs.overlays = [ diff --git a/m/common/watchdog.nix b/m/common/watchdog.nix new file mode 100644 index 0000000..d4d297d --- /dev/null +++ b/m/common/watchdog.nix @@ -0,0 +1,9 @@ +{ ... }: + +{ + # The boards have a BMC watchdog controlled by IPMI + boot.kernelModules = [ "ipmi_watchdog" ]; + + # Enable systemd watchdog with 30 s interval + systemd.watchdog.runtimeTime = "30s"; +} -- 2.49.0 From acb91695ac2158b2b0b966ff7a8df5b3c37ad24c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 31 Aug 2023 17:22:36 +0200 Subject: [PATCH 151/472] Enable binary emulation for other architectures --- m/hut/configuration.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index b4ba36f..1be9dc3 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -17,6 +17,8 @@ agenix.packages.x86_64-linux.default ]; + boot.binfmt.emulatedSystems = [ "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; + # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; -- 2.49.0 From 2c52ef9ff0df37ba3a29ad1a105d224691f8010d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Sat, 2 Sep 2023 23:37:11 +0200 Subject: [PATCH 152/472] Store nixos config in /etc/nixos/config.rev --- m/common/main.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/common/main.nix b/m/common/main.nix index f9141aa..25a209c 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -33,6 +33,9 @@ then theFlake.rev else throw ("Refusing to build from a dirty Git tree!"); + # Save the commit of the config in /etc/nixos/config.rev + environment.etc."nixos/config.rev".text = system.configurationRevision; + environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree -- 2.49.0 From a2c5fe1f5e357bb3a49a10fd84b8113a10790040 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Sat, 2 Sep 2023 23:37:59 +0200 Subject: [PATCH 153/472] Configure bscpkgs.nixpkgs to follow nixpkgs --- flake.lock | 13 +++++++++---- flake.nix | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/flake.lock b/flake.lock index 1a3da19..44b83b7 100644 --- a/flake.lock +++ b/flake.lock @@ -23,12 +23,17 @@ } }, "bscpkgs": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, "locked": { - "lastModified": 1690560045, - "narHash": "sha256-39ZP+FIzlWoN3c43hReBYpStg4RLYw/z7TdxCQmOvTM=", + "lastModified": 1693479395, + "narHash": "sha256-/C0ZEafHZlhD0m145P5crGPcD7Ck9GGSTbiqbiAlgUo=", "ref": "refs/heads/master", - "rev": "b4a20d7c3af854b39682484adfd1c7979319f439", - "revCount": 841, + "rev": "18d64c352c10f9ce74aabddeba5a5db02b74ec27", + "revCount": 845, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, diff --git a/flake.nix b/flake.nix index 7bd69c0..10be75a 100644 --- a/flake.nix +++ b/flake.nix @@ -4,6 +4,7 @@ agenix.url = "github:ryantm/agenix"; agenix.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git"; + bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; }; outputs = { self, nixpkgs, agenix, bscpkgs, ... }: -- 2.49.0 From a242ddd39ce7a6ec6292debf1f1e8fb6e7bc37ec Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Sat, 2 Sep 2023 23:49:41 +0200 Subject: [PATCH 154/472] Keep a log over time with the config commits --- m/common/main.nix | 14 ++++++-------- m/common/rev.nix | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 m/common/rev.nix diff --git a/m/common/main.nix b/m/common/main.nix index 25a209c..3792135 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -11,6 +11,7 @@ ./ssh.nix ./users.nix ./watchdog.nix + ./rev.nix ]; nixpkgs.overlays = [ @@ -18,6 +19,11 @@ (import ../../pkgs/overlay.nix) ]; + system.configurationRevision = + if theFlake ? rev + then theFlake.rev + else throw ("Refusing to build from a dirty Git tree!"); + nix.nixPath = [ "nixpkgs=${nixpkgs}" "bscpkgs=${bscpkgs}" @@ -28,14 +34,6 @@ nix.registry.bscpkgs.flake = bscpkgs; nix.registry.jungle.flake = theFlake; - system.configurationRevision = - if theFlake ? rev - then theFlake.rev - else throw ("Refusing to build from a dirty Git tree!"); - - # Save the commit of the config in /etc/nixos/config.rev - environment.etc."nixos/config.rev".text = system.configurationRevision; - environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree diff --git a/m/common/rev.nix b/m/common/rev.nix new file mode 100644 index 0000000..80d019b --- /dev/null +++ b/m/common/rev.nix @@ -0,0 +1,18 @@ +{ theFlake, ... }: + +let + rev = if theFlake ? rev then theFlake.rev + else throw ("Refusing to build from a dirty Git tree!"); +in { + # Save the commit of the config in /etc/configrev + environment.etc.configrev.text = rev + "\n"; + + # Keep a log with the config over time + system.activationScripts.configRevLog.text = '' + BOOTED=$(cat /run/booted-system/etc/configrev 2>/dev/null || echo unknown) + CURRENT=$(cat /run/current-system/etc/configrev 2>/dev/null || echo unknown) + NEXT=${rev} + DATENOW=$(date --iso-8601=seconds) + echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log + ''; +} -- 2.49.0 From d8d6d6d42165a904ea454c6487aba5cdae45ae8a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Sun, 3 Sep 2023 11:51:53 +0200 Subject: [PATCH 155/472] Enable zsh and fix key bindings --- m/common/main.nix | 4 +-- m/common/zsh.nix | 89 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 m/common/zsh.nix diff --git a/m/common/main.nix b/m/common/main.nix index 3792135..f664ef8 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -12,6 +12,7 @@ ./users.nix ./watchdog.nix ./rev.nix + ./zsh.nix ]; nixpkgs.overlays = [ @@ -73,9 +74,6 @@ nix.gc.dates = "weekly"; nix.gc.options = "--delete-older-than 30d"; - programs.zsh.enable = true; - programs.zsh.histSize = 100000; - programs.bash.promptInit = '' PS1="\h\\$ " ''; diff --git a/m/common/zsh.nix b/m/common/zsh.nix new file mode 100644 index 0000000..c437a49 --- /dev/null +++ b/m/common/zsh.nix @@ -0,0 +1,89 @@ +{ pkgs, ... }: + +{ + environment.systemPackages = with pkgs; [ + direnv + zsh-completions + nix-zsh-completions + ]; + + programs.zsh = { + enable = true; + histSize = 1000000; + + shellInit = '' + # Disable new user prompt + if [ ! -e ~/.zshrc ]; then + touch ~/.zshrc + fi + ''; + + promptInit = '' + # Note that to manually override this in ~/.zshrc you should run `prompt off` + # before setting your PS1 and etc. Otherwise this will likely to interact with + # your ~/.zshrc configuration in unexpected ways as the default prompt sets + # a lot of different prompt variables. + autoload -U promptinit && promptinit && prompt default && setopt prompt_sp + ''; + + # Taken from Ulli Kehrle config: + # https://git.hrnz.li/Ulli/nixos/src/commit/2e203b8d8d671f4e3ced0f1744a51d5c6ee19846/profiles/shell.nix#L199-L205 + interactiveShellInit = '' + source "${pkgs.zsh-history-substring-search}/share/zsh-history-substring-search/zsh-history-substring-search.zsh" + + # dircolors doesn't support alacritty: + # https://lists.gnu.org/archive/html/bug-coreutils/2019-05/msg00029.html + export LS_COLORS='rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=00:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.avif=01;35:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:*~=00;90:*#=00;90:*.bak=00;90:*.old=00;90:*.orig=00;90:*.part=00;90:*.rej=00;90:*.swp=00;90:*.tmp=00;90:*.dpkg-dist=00;90:*.dpkg-old=00;90:*.ucf-dist=00;90:*.ucf-new=00;90:*.ucf-old=00;90:*.rpmnew=00;90:*.rpmorig=00;90:*.rpmsave=00;90:'; + + # From Arch Linux and GRML + bindkey "^R" history-incremental-pattern-search-backward + bindkey "^S" history-incremental-pattern-search-forward + + # Auto rehash for new binaries + zstyle ':completion:*' rehash true + # show a nice menu with the matches + zstyle ':completion:*' menu yes select + + bindkey '^[OA' history-substring-search-up # Up + bindkey '^[[A' history-substring-search-up # Up + + bindkey '^[OB' history-substring-search-down # Down + bindkey '^[[B' history-substring-search-down # Down + + bindkey '\e[1~' beginning-of-line # Home + bindkey '\e[7~' beginning-of-line # Home + bindkey '\e[H' beginning-of-line # Home + bindkey '\eOH' beginning-of-line # Home + + bindkey '\e[4~' end-of-line # End + bindkey '\e[8~' end-of-line # End + bindkey '\e[F ' end-of-line # End + bindkey '\eOF' end-of-line # End + + bindkey '^?' backward-delete-char # Backspace + bindkey '\e[3~' delete-char # Del + # bindkey '\e[3;5~' delete-char # sometimes Del, sometimes C-Del + bindkey '\e[2~' overwrite-mode # Ins + + bindkey '^H' backward-kill-word # C-Backspace + + bindkey '5~' kill-word # C-Del + bindkey '^[[3;5~' kill-word # C-Del + bindkey '^[[3^' kill-word # C-Del + + bindkey "^[[1;5H" backward-kill-line # C-Home + bindkey "^[[7^" backward-kill-line # C-Home + + bindkey "^[[1;5F" kill-line # C-End + bindkey "^[[8^" kill-line # C-End + + bindkey '^[[1;5C' forward-word # C-Right + bindkey '^[0c' forward-word # C-Right + bindkey '^[[5C' forward-word # C-Right + + bindkey '^[[1;5D' backward-word # C-Left + bindkey '^[0d' backward-word # C-Left + bindkey '^[[5D' backward-word # C-Left + ''; + }; +} -- 2.49.0 From 13807c5e8fe30c7cfcdd1b7073af5146aba9214d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Sun, 3 Sep 2023 16:46:27 +0200 Subject: [PATCH 156/472] Set zsh shell for rarias --- m/common/users.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/m/common/users.nix b/m/common/users.nix index 3399ed3..d333c20 100644 --- a/m/common/users.nix +++ b/m/common/users.nix @@ -1,4 +1,4 @@ -{ ... }: +{ pkgs, ... }: { users = { @@ -26,6 +26,7 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal" ]; + shell = pkgs.zsh; }; arocanon = { -- 2.49.0 From 1e466d07df43b6401e12616a4e9a639146101d5a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Sun, 3 Sep 2023 16:57:53 +0200 Subject: [PATCH 157/472] Set zsh inc_append_history option --- m/common/zsh.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/common/zsh.nix b/m/common/zsh.nix index c437a49..666e34b 100644 --- a/m/common/zsh.nix +++ b/m/common/zsh.nix @@ -31,6 +31,9 @@ interactiveShellInit = '' source "${pkgs.zsh-history-substring-search}/share/zsh-history-substring-search/zsh-history-substring-search.zsh" + # Save history immediately, but only load it when the shell starts + setopt inc_append_history + # dircolors doesn't support alacritty: # https://lists.gnu.org/archive/html/bug-coreutils/2019-05/msg00029.html export LS_COLORS='rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=00:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.avif=01;35:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:*~=00;90:*#=00;90:*.bak=00;90:*.old=00;90:*.orig=00;90:*.part=00;90:*.rej=00;90:*.swp=00;90:*.tmp=00;90:*.dpkg-dist=00;90:*.dpkg-old=00;90:*.ucf-dist=00;90:*.ucf-new=00;90:*.ucf-old=00;90:*.rpmnew=00;90:*.rpmorig=00;90:*.rpmsave=00;90:'; -- 2.49.0 From 900de39e2fb6181c143167054c8f6ed3acf7712e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 16:00:01 +0200 Subject: [PATCH 158/472] Add anavarro user --- m/common/users.nix | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/m/common/users.nix b/m/common/users.nix index d333c20..8451196 100644 --- a/m/common/users.nix +++ b/m/common/users.nix @@ -54,6 +54,18 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" ]; }; + + anavarro = { + uid = 1037; + isNormalUser = true; + home = "/home/Computational/anavarro"; + description = "Antoni Navarro"; + group = "Computational"; + hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" + ]; + }; }; groups = { -- 2.49.0 From 0a5f9b55f5ff7b6064cedaf73aeee890b8863d38 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 21:36:31 +0200 Subject: [PATCH 159/472] Reorganize secrets and ssh keys The agenix tools needs to read the secrets from a standalone file, but we also need the same information for the SSH keys. --- keys.nix | 29 +++++++++++++++++++++++++++++ m/common/ssh.nix | 14 ++++++-------- m/hut/ceph.nix | 4 ++-- m/hut/gitlab-runner.nix | 10 +++++----- m/hut/secrets/ceph-user.age | 11 ----------- m/hut/secrets/nosv-token.age | Bin 541 -> 0 bytes m/hut/secrets/ovni-token.age | Bin 610 -> 0 bytes secrets/ceph-user.age | 21 +++++++++++++++++++++ secrets/nosv-token.age | 11 +++++++++++ secrets/ovni-token.age | Bin 0 -> 553 bytes secrets/secrets.nix | 13 +++++++++++++ 11 files changed, 87 insertions(+), 26 deletions(-) create mode 100644 keys.nix delete mode 100644 m/hut/secrets/ceph-user.age delete mode 100644 m/hut/secrets/nosv-token.age delete mode 100644 m/hut/secrets/ovni-token.age create mode 100644 secrets/ceph-user.age create mode 100644 secrets/nosv-token.age create mode 100644 secrets/ovni-token.age create mode 100644 secrets/secrets.nix diff --git a/keys.nix b/keys.nix new file mode 100644 index 0000000..681fcbc --- /dev/null +++ b/keys.nix @@ -0,0 +1,29 @@ +# As agenix needs to parse the secrets from a standalone .nix file, we describe +# here all the public keys +rec { + hosts = { + hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut"; + owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1"; + owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2"; + eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy"; + koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; + bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; + lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; + }; + + hostGroup = with hosts; rec { + compute = [ owl1 owl2 ]; + playground = [ eudy koro ]; + storage = [ bay lake2 ]; + monitor = [ hut ]; + + system = storage ++ monitor; + safe = system ++ compute; + all = safe ++ playground; + }; + + admins = { + rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; + root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"; + }; +} diff --git a/m/common/ssh.nix b/m/common/ssh.nix index 2d805bf..b8cb5c1 100644 --- a/m/common/ssh.nix +++ b/m/common/ssh.nix @@ -1,5 +1,9 @@ -{ ... }: +{ lib, ... }: +let + keys = import ../../keys.nix; + hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts; +in { # Enable the OpenSSH daemon. services.openssh.enable = true; @@ -11,13 +15,7 @@ ProxyCommand nc -X connect -x localhost:23080 %h %p ''; - programs.ssh.knownHosts = { - "hut".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; - "owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv"; - "owl2".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK"; - "eudy".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG"; - "koro".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67"; - + programs.ssh.knownHosts = hostsKeys // { "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; }; diff --git a/m/hut/ceph.nix b/m/hut/ceph.nix index 3bebe11..ebbb885 100644 --- a/m/hut/ceph.nix +++ b/m/hut/ceph.nix @@ -11,14 +11,14 @@ # modprobe command. boot.kernelModules = [ "ceph" ]; - age.secrets."secrets/ceph-user".file = ./secrets/ceph-user.age; + age.secrets.cephUser.file = ../../secrets/ceph-user.age; fileSystems."/ceph" = { fsType = "ceph"; device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/"; options = [ "mon_addr=10.0.40.40" - "secretfile=${config.age.secrets."secrets/ceph-user".path}" + "secretfile=${config.age.secrets.cephUser.path}" ]; }; } diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index 6255005..d640de9 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -1,15 +1,15 @@ { pkgs, lib, config, ... }: { - age.secrets."secrets/ovni-token".file = ./secrets/ovni-token.age; - age.secrets."secrets/nosv-token".file = ./secrets/nosv-token.age; + age.secrets.ovniToken.file = ../../secrets/ovni-token.age; + age.secrets.nosvToken.file = ../../secrets/nosv-token.age; services.gitlab-runner = { enable = true; settings.concurrent = 5; services = { ovni-shell = { - registrationConfigFile = config.age.secrets."secrets/ovni-token".path; + registrationConfigFile = config.age.secrets.ovniToken.path; executor = "shell"; tagList = [ "nix" "xeon" ]; environmentVariables = { @@ -17,7 +17,7 @@ }; }; ovni-docker = { - registrationConfigFile = config.age.secrets."secrets/ovni-token".path; + registrationConfigFile = config.age.secrets.ovniToken.path; dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; registrationFlags = [ "--docker-network-mode host" ]; @@ -27,7 +27,7 @@ }; }; nosv-docker = { - registrationConfigFile = config.age.secrets."secrets/nosv-token".path; + registrationConfigFile = config.age.secrets.nosvToken.path; dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; registrationFlags = [ diff --git a/m/hut/secrets/ceph-user.age b/m/hut/secrets/ceph-user.age deleted file mode 100644 index 735afca..0000000 --- a/m/hut/secrets/ceph-user.age +++ /dev/null @@ -1,11 +0,0 @@ -age-encryption.org/v1 --> ssh-ed25519 CAWG4Q 35Ak+Mep9k5KnDLF1ywDbMD4l4mRFg6D0et19tqXxAw -Wgr+CX4rzrPmUszSidtLAVSvgD80F2dqtd92hGZIFwo --> ssh-ed25519 MSF3dg OVFvpkAyWTowtxsafstX31H/hJpNZmnOCbvqMIN0+AQ -VxjRcQmp+BadEh2y0PB96EeizIl3tTQpVu0CWHmsc1s --> ssh-ed25519 HY2yRg MJSQIpre9m0XnojgXuKQ/+hVBZNrZNGZqplwhqicpjI -CLkE52iqpoqSnbzisNjQgxTfNqKeaRl5ntcw1d+ZDyQ --> m$8`De%~-grease '85p}`by -52zMpprONcawWDDtzHdWNwFoYXErPUnVjhSONbUBpDlqAmJmD1LcAnsU ---- 0vZOPyXQIMMGTwgFfvm8Sn8O7vjrsjGUEy5m/BASCyc -|)*_DUS`r sN[֌^e+A 1G.#mW 5 ( \ No newline at end of file diff --git a/m/hut/secrets/nosv-token.age b/m/hut/secrets/nosv-token.age deleted file mode 100644 index 4b495a944de69fccbfdde7394b7f63f80e53e06e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 541 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4b_{nn2~==1G&e6y zb4@Ssj7)Y9^7ab}HY!NfH_8pN3^p^!&JL~e4owX3wlFS>aO6t%539;9&2|p8v~Z0m zO>*@v%kkGw^YKh8F3Kn=_I4^Rv_C>eNH`vWMC0!xIKP=bW&>%duIN3Zs zDj=xX#n-gNF-^O`xIDeWIoB*Wvb5YVzbGg*%Yv(Y)P+(>70)BGy;O4Gt{lZ=v}Z}B-g%B? zp_%%T#wx4dwMeL zTqo#c`6z$Cxb@c-&Es$9Y*u-8PJOHIZ29^`mm}{xW<^hG>8*NGbZNJ2$pU{_zts4S zd{ZrDYx&Hh%$HZ>yxjQJn{~(9bsNnm9asA!{H@{T$E!M99=4pR;@kAlKJm+}94!yU FZvZP`#906U diff --git a/m/hut/secrets/ovni-token.age b/m/hut/secrets/ovni-token.age deleted file mode 100644 index bb850ef38e40d426b50aba25ae6993fe521b4d74..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 610 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4b_{nn2~==3w9Ii# z@d+#~E=zF_D9p(7w2aIPi83%t&+<=mE=(>nH}JJ|Pf9GywcskP)Q_|%O*hvL$jtC| zDM<26iwG$!4loP~_HZr?ORCBWa&-&MDXa)G$Va!$H`vWMC0)VL%*Q<-D=5Us$Hmvj z)G5)&s5B!q%`wt7z%m6&$uW%!_d+& z(9Ju<$SJejFE!sR$RpSwG0)PX$OPRsk4U4+pmYU&$CS*xOrvsx5P#z=%g7v8*MO3M z91|BulOXq!sHliapM2Bu%ygIHkYui;+=BGN;-b8a0t<_sfiZOu3Rp^1&X|; zZ&+IRvM{JE)H3 ssh-ed25519 AY8zKw J00a6ZOhkupkhLU5WQ0kD05HEF4KKsSs2hwjHKbnnHU +J14VoNOCqLpScVO7OLXbqTcLI4tcVUHt5cqY/XQmbGs +-> ssh-ed25519 sgAamA k8R/bSUdvVmlBI6yHPi5NBQPBGM36lPJwsir8DFGgxE +4ZKC3gYvic6AVrNGgNjwztbUzhxP8ViX5O3wFo9wlrk +-> ssh-ed25519 HY2yRg 966xf2fTnA6Wq0uYXbXZQOManqITJcCbQS9LZCGEOh4 +Qg5echQSrzqeDqvaMx+5fqi8XyTjAeCsY/UFJX6YnDs +-> ssh-ed25519 tcumPQ e0U2okrGIoUpLfPYjIRx1V92rE3hZW13nJef+l3kBQg +LejAUKBl+tPhwocCF00ZHTzFISnwX8og8GvemiMIcyo +-> ssh-ed25519 JJ1LWg QkzTsPq9Gdh+FNz/a4bDb9LQOreFyxeTC51UNd1fsj0 +ayrlKenETfQzH1Z9drVEWqszQebicGVJve0/pCnxAE8 +-> ssh-ed25519 CAWG4Q lJLW9+dxvyoD4hYzeXeE/4rzJ6HIeEQOB1+fbhV3xw0 +T2RrVCtTuQvya9HiJB7txk3QGrntpsMX9Tt1cyXoW5E +-> ssh-ed25519 MSF3dg JOZkFb2CfqWKvZIz7lYxXWgv8iEVDkQF8hInDMZvknc +MHDWxjUw4dNiC1h4MrU9uKKcI3rwkxABm0+5FYMZkok +-> ~8m;7f-grease +lDIullfC98RhpTZ4Mk87Td+VtPmwPdgz+iIilpKugUkmV5r4Uqd7yE+5ArA6ekr/ +G/X4EA +--- Cz4sv9ZunBcVdZCozdTh1zlg1zIASjk2MjYeYfcN9eA +N $[HQ +d'7Ͳ)x9yEM7^[M+&$8tMв \ No newline at end of file diff --git a/secrets/nosv-token.age b/secrets/nosv-token.age new file mode 100644 index 0000000..31a354b --- /dev/null +++ b/secrets/nosv-token.age @@ -0,0 +1,11 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg hrdS7Dl/j+u3XVfM79ZJpZSlre9TcD7DTQ+EEAT6kEE +avUO96P1h7w2BYWgrQ7GpUgdaCV9AZL7eOTTcF9gfro +-> ssh-ed25519 CAWG4Q A5raRY1CAgFYZgoQ92GMyNejYNdHx/7Y6uTS+EjLPWA +FRFqT2Jz7qRcybaxkQTKHGl797LVXoHpYG4RZSrX/70 +-> ssh-ed25519 MSF3dg D+R80Bg7W9AuiOMAqtGFZQl994dRBIegYRLmmTaeZ3o +BHvZsugRiuZ91b4jk91h30o3eF3hadSnVCwxXge95T8 +-> BT/El`a-grease W{nq|Vm )bld 2Nl}4 N$#JGB4t +oLG+0S1aGfO/ohCfgGmhDhwwLi4H +--- 2I5C+FvBG/K1ZHh7C5QD39feTSLoFGwcTeZAmeILNsI +Wo d;C._(u G#vgɝyYl9ϵ.0x޽N./tBbK:Q\T_txm_Jޞ- \ No newline at end of file diff --git a/secrets/ovni-token.age b/secrets/ovni-token.age new file mode 100644 index 0000000000000000000000000000000000000000..4378c388dd465e6d39681bbca5354136454855f8 GIT binary patch literal 553 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCUlh%~AUN>_-A$`23A zDo)GIF7wp(Ov{eQiSY0;)Hm`AOESz(bu&-SbPv(bND0o*HQ*{q)s8eRb&X2$Fo|?c zOfGXaaC0xrG&Ts+&vSRlEh&#IiS(=THpz%OJ4OHKZauETk$V)xh7pJUF*9D4nagq`0)SI48Tr$=@Kr*)S?BGtkA% z-^A3zJlM(0Gbzh8t;{*xG|M-!(i7b_-(WZ6lyn7c$BKX~R|``M!<@);L%*<~j9~Y~ zf+};bEOTv>s;sj7ilnS^cgsqra7V6`vdSQXDt$9w-@KrF7tet3GT%ZM^U9n`KOb|` z$nZ$F3geKdv^4F?T(IA~bkmDc6N^(73QbJ)6e=r>O%#+0Lj$=?qbhv3baizV!c9F5 z^dpTN!#rK{GCa(RQYw#gR=1f`Si!D3=SW$xm)!n%D5i zN@(p(vjm3cj@_0PHOy!2h%MOzk;irohVPU`daU;a=K7W}SfdCXr`TOLcc1Brj_ Q4n26Eb9KfoqnGXB00g1M?EnA( literal 0 HcmV?d00001 diff --git a/secrets/secrets.nix b/secrets/secrets.nix new file mode 100644 index 0000000..34fb177 --- /dev/null +++ b/secrets/secrets.nix @@ -0,0 +1,13 @@ +let + keys = import ../keys.nix; + adminsKeys = builtins.attrValues keys.admins; + hut = [ keys.hosts.hut ] ++ adminsKeys; + # Only expose ceph keys to safe nodes and admins + ceph = keys.hostGroup.safe ++ adminsKeys; +in +{ + "ovni-token.age".publicKeys = hut; + "nosv-token.age".publicKeys = hut; + + "ceph-user.age".publicKeys = ceph; +} -- 2.49.0 From 74ec4eb22aca1eb3a8f9a61d07ed54e2833163d7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 21:59:04 +0200 Subject: [PATCH 160/472] Move the ceph client config to an external module --- m/hut/configuration.nix | 2 +- m/{hut => module}/ceph.nix | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) rename m/{hut => module}/ceph.nix (93%) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 1be9dc3..c3e72ab 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -4,11 +4,11 @@ imports = [ ../common/main.nix + ../module/ceph.nix ./gitlab-runner.nix ./monitoring.nix ./nfs.nix ./slurm-daemon.nix - ./ceph.nix #./pxe.nix agenix.nixosModules.default ]; diff --git a/m/hut/ceph.nix b/m/module/ceph.nix similarity index 93% rename from m/hut/ceph.nix rename to m/module/ceph.nix index ebbb885..ff3fd22 100644 --- a/m/hut/ceph.nix +++ b/m/module/ceph.nix @@ -1,5 +1,6 @@ { config, pkgs, ... }: +# Mounts the /ceph filesystem at boot { environment.systemPackages = with pkgs; [ ceph -- 2.49.0 From d704816de9deb22c196b474f74da2e32c846a871 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 21:59:56 +0200 Subject: [PATCH 161/472] Clean owl2 configuration --- m/owl2/configuration.nix | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 8022b36..ed1aab4 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -1,15 +1,12 @@ -{ config, pkgs, modulesPath, lib, ... }: +{ config, pkgs, ... }: { imports = [ - #(modulesPath + "/installer/netboot/netboot-minimal.nix") ../common/main.nix ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629"; - #programs.ssh.forwardX11 = false; - #programs.ssh.setXAuthLocation = lib.mkForce true; networking = { hostName = "owl2"; -- 2.49.0 From e2f82a63830b8446672b2ff76a6a856a281b26ed Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 22:00:17 +0200 Subject: [PATCH 162/472] Warn about the owl2 omnipath device --- m/owl2/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index ed1aab4..8da8f7d 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -14,6 +14,7 @@ address = "10.0.40.2"; prefixLength = 24; } ]; + # Watch out! The OmniPath device is not in the same place here: interfaces.ibp129s0.ipv4.addresses = [ { address = "10.0.42.2"; prefixLength = 24; -- 2.49.0 From 208dcb7dde3025d4edaf1c40dd5fe70b2ce1e362 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 22:00:36 +0200 Subject: [PATCH 163/472] Mount /ceph in owl1 and owl2 --- m/owl1/configuration.nix | 5 ++++- m/owl2/configuration.nix | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index a14ab21..8255898 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -1,7 +1,10 @@ { config, pkgs, ... }: { - imports = [ ../common/main.nix ]; + imports = [ + ../common/main.nix + ../module/ceph.nix + ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c"; diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 8da8f7d..0af3c40 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/main.nix + ../module/ceph.nix ]; # Select the this using the ID to avoid mismatches -- 2.49.0 From 2c8c90e6e49ff7625040c417deda7908eefc17ad Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 22:04:32 +0200 Subject: [PATCH 164/472] Remove old secrets --- m/hut/secrets.nix | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 m/hut/secrets.nix diff --git a/m/hut/secrets.nix b/m/hut/secrets.nix deleted file mode 100644 index d22add6..0000000 --- a/m/hut/secrets.nix +++ /dev/null @@ -1,11 +0,0 @@ -let - rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; - root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb"; - hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; - default = [ rarias root hut ]; -in -{ - "secrets/ovni-token.age".publicKeys = default; - "secrets/nosv-token.age".publicKeys = default; - "secrets/ceph-user.age".publicKeys = default; -} -- 2.49.0 From 2042d58b72bae349c39f6015a97f4eff6f801234 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 22:06:20 +0200 Subject: [PATCH 165/472] Add agenix module to ceph --- m/module/ceph.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/m/module/ceph.nix b/m/module/ceph.nix index ff3fd22..ac7b3d3 100644 --- a/m/module/ceph.nix +++ b/m/module/ceph.nix @@ -1,7 +1,9 @@ -{ config, pkgs, ... }: +{ config, pkgs, agenix, ... }: # Mounts the /ceph filesystem at boot { + imports = [ agenix.nixosModules.default ]; + environment.systemPackages = with pkgs; [ ceph ceph-client -- 2.49.0 From 68f4d54dd144aa61a41375ffdc61aa3836555913 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 4 Sep 2023 22:09:40 +0200 Subject: [PATCH 166/472] Add agenix to all nodes --- m/common/agenix.nix | 9 +++++++++ m/common/main.nix | 1 + m/hut/configuration.nix | 7 +------ m/module/ceph.nix | 4 +--- 4 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 m/common/agenix.nix diff --git a/m/common/agenix.nix b/m/common/agenix.nix new file mode 100644 index 0000000..3d5bb65 --- /dev/null +++ b/m/common/agenix.nix @@ -0,0 +1,9 @@ +{ agenix, ... }: + +{ + imports = [ agenix.nixosModules.default ]; + + environment.systemPackages = [ + agenix.packages.x86_64-linux.default + ]; +} diff --git a/m/common/main.nix b/m/common/main.nix index f664ef8..a5f8e45 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -2,6 +2,7 @@ { imports = [ + ./agenix.nix ./boot.nix ./fs.nix ./hw.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index c3e72ab..fc4d2ab 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -1,4 +1,4 @@ -{ config, pkgs, agenix, ... }: +{ config, pkgs, ... }: { imports = [ @@ -10,11 +10,6 @@ ./nfs.nix ./slurm-daemon.nix #./pxe.nix - agenix.nixosModules.default - ]; - - environment.systemPackages = [ - agenix.packages.x86_64-linux.default ]; boot.binfmt.emulatedSystems = [ "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; diff --git a/m/module/ceph.nix b/m/module/ceph.nix index ac7b3d3..ff3fd22 100644 --- a/m/module/ceph.nix +++ b/m/module/ceph.nix @@ -1,9 +1,7 @@ -{ config, pkgs, agenix, ... }: +{ config, pkgs, ... }: # Mounts the /ceph filesystem at boot { - imports = [ agenix.nixosModules.default ]; - environment.systemPackages = with pkgs; [ ceph ceph-client -- 2.49.0 From 8d31c552f584e5ca43bffa523d581b990c9fe964 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 5 Sep 2023 15:03:26 +0200 Subject: [PATCH 167/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'bscpkgs': 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=18d64c352c10f9ce74aabddeba5a5db02b74ec27' (2023-08-31) → 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=ee24b910a1cb95bd222e253da43238e843816f2f' (2023-09-01) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/d680ded26da5cf104dd2735a51e88d2d8f487b4d' (2023-08-19) → 'github:NixOS/nixpkgs/e56990880811a451abd32515698c712788be5720' (2023-09-02) --- flake.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flake.lock b/flake.lock index 44b83b7..97c2222 100644 --- a/flake.lock +++ b/flake.lock @@ -29,11 +29,11 @@ ] }, "locked": { - "lastModified": 1693479395, - "narHash": "sha256-/C0ZEafHZlhD0m145P5crGPcD7Ck9GGSTbiqbiAlgUo=", + "lastModified": 1693579892, + "narHash": "sha256-dcgnHtutDrKmWW8mcr1jb4JoM7kbH9Q8QSW6aA6i2dI=", "ref": "refs/heads/master", - "rev": "18d64c352c10f9ce74aabddeba5a5db02b74ec27", - "revCount": 845, + "rev": "ee24b910a1cb95bd222e253da43238e843816f2f", + "revCount": 848, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, @@ -87,11 +87,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1692447944, - "narHash": "sha256-fkJGNjEmTPvqBs215EQU4r9ivecV5Qge5cF/QDLVn3U=", + "lastModified": 1693663421, + "narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "d680ded26da5cf104dd2735a51e88d2d8f487b4d", + "rev": "e56990880811a451abd32515698c712788be5720", "type": "github" }, "original": { -- 2.49.0 From eb9876aff6348d8ca85827dbcd9e4f7191f264ff Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 5 Sep 2023 16:24:27 +0200 Subject: [PATCH 168/472] Unlock ovni gitlab runners --- m/hut/gitlab-runner.nix | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index d640de9..d9fedb5 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -12,6 +12,10 @@ registrationConfigFile = config.age.secrets.ovniToken.path; executor = "shell"; tagList = [ "nix" "xeon" ]; + registrationFlags = [ + # Using space doesn't work, and causes it to misread the next flag + "--locked='false'" + ]; environmentVariables = { SHELL = "${pkgs.bash}/bin/bash"; }; @@ -20,7 +24,10 @@ registrationConfigFile = config.age.secrets.ovniToken.path; dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; - registrationFlags = [ "--docker-network-mode host" ]; + registrationFlags = [ + "--locked='false'" + "--docker-network-mode host" + ]; environmentVariables = { https_proxy = "http://localhost:23080"; http_proxy = "http://localhost:23080"; -- 2.49.0 From efe1b7e399b4c34ef53ddc36ce5e83ad8d96f626 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 7 Sep 2023 11:13:45 +0200 Subject: [PATCH 169/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'bscpkgs': 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=ee24b910a1cb95bd222e253da43238e843816f2f' (2023-09-01) → 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=6122fef92701701e1a0622550ac0fc5c2beb5906' (2023-09-07) --- flake.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flake.lock b/flake.lock index 97c2222..294fa56 100644 --- a/flake.lock +++ b/flake.lock @@ -29,11 +29,11 @@ ] }, "locked": { - "lastModified": 1693579892, - "narHash": "sha256-dcgnHtutDrKmWW8mcr1jb4JoM7kbH9Q8QSW6aA6i2dI=", + "lastModified": 1694077645, + "narHash": "sha256-72bvRBhq8Q8V6ibsR9lyBE92V2EC6C6Ek3J5cOM79So=", "ref": "refs/heads/master", - "rev": "ee24b910a1cb95bd222e253da43238e843816f2f", - "revCount": 848, + "rev": "6122fef92701701e1a0622550ac0fc5c2beb5906", + "revCount": 860, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, -- 2.49.0 From 1a1708f16f40ae9a3fecf49be98567ff1807604b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 8 Sep 2023 13:21:37 +0200 Subject: [PATCH 170/472] Add IB and IPMI node host names --- m/common/net.nix | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/m/common/net.nix b/m/common/net.nix index 09765c4..e542e5f 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -32,8 +32,8 @@ # Node Entry for node: mds01 (ID=72) 10.0.40.40 bay mds01 mds01-eth0 - 10.0.42.40 mds01-ib0 - 10.0.40.141 mds01-ipmi0 + 10.0.42.40 bay-ib mds01-ib0 + 10.0.40.141 bay-ipmi mds01-ipmi0 # Node Entry for node: oss01 (ID=73) 10.0.40.41 oss01 oss01-eth0 @@ -42,18 +42,18 @@ # Node Entry for node: oss02 (ID=74) 10.0.40.42 lake2 oss02 oss02-eth0 - 10.0.42.42 oss02-ib0 - 10.0.40.143 oss02-ipmi0 + 10.0.42.42 lake2-ib oss02-ib0 + 10.0.40.143 lake2-ipmi oss02-ipmi0 # Node Entry for node: xeon01 (ID=15) 10.0.40.1 owl1 xeon01 xeon01-eth0 - 10.0.42.1 xeon01-ib0 - 10.0.40.101 xeon01-ipmi0 + 10.0.42.1 owl1-ib xeon01-ib0 + 10.0.40.101 owl1-ipmi xeon01-ipmi0 # Node Entry for node: xeon02 (ID=16) 10.0.40.2 owl2 xeon02 xeon02-eth0 - 10.0.42.2 xeon02-ib0 - 10.0.40.102 xeon02-ipmi0 + 10.0.42.2 owl2-ib xeon02-ib0 + 10.0.40.102 owl2-ipmi xeon02-ipmi0 # Node Entry for node: xeon03 (ID=17) 10.0.40.3 xeon03 xeon03-eth0 @@ -67,8 +67,8 @@ # Node Entry for node: xeon05 (ID=19) 10.0.40.5 koro xeon05 xeon05-eth0 - 10.0.42.5 xeon05-ib0 - 10.0.40.105 xeon05-ipmi0 + 10.0.42.5 koro-ib xeon05-ib0 + 10.0.40.105 koro-ipmi xeon05-ipmi0 # Node Entry for node: xeon06 (ID=20) 10.0.40.6 xeon06 xeon06-eth0 @@ -77,13 +77,13 @@ # Node Entry for node: xeon07 (ID=21) 10.0.40.7 hut xeon07 xeon07-eth0 - 10.0.42.7 xeon07-ib0 - 10.0.40.107 xeon07-ipmi0 + 10.0.42.7 hut-ib xeon07-ib0 + 10.0.40.107 hut-ipmi xeon07-ipmi0 # Node Entry for node: xeon08 (ID=22) 10.0.40.8 eudy xeon08 xeon08-eth0 - 10.0.42.8 xeon08-ib0 - 10.0.40.108 xeon08-ipmi0 + 10.0.42.8 eudy-ib xeon08-ib0 + 10.0.40.108 eudy-ipmi xeon08-ipmi0 ''; }; } -- 2.49.0 From 9c9c41fb57716a6e0ce08bfbcc3fb0984fea1082 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 8 Sep 2023 13:31:23 +0200 Subject: [PATCH 171/472] Poweroff idle slurm nodes after 1 hour --- m/common/slurm.nix | 47 ++++++++++++++++++++++++++++++++++++++++-- m/hut/slurm-daemon.nix | 4 ---- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/m/common/slurm.nix b/m/common/slurm.nix index b02a914..988e1d8 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -1,6 +1,33 @@ -{ lib, ... }: +{ pkgs, lib, ... }: -{ +let + suspendProgram = pkgs.writeScript "suspend.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Shutting down host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off + done + ''; + + resumeProgram = pkgs.writeScript "resume.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Starting host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on + done + ''; + +in { systemd.services.slurmd.serviceConfig = { # Kill all processes in the control group on stop/restart. This will kill # all the jobs running, so ensure that we only upgrade when the nodes are @@ -9,6 +36,7 @@ # https://bugs.schedmd.com/show_bug.cgi?id=2095#c24 KillMode = lib.mkForce "control-group"; }; + services.slurm = { client.enable = true; controlMachine = "hut"; @@ -18,6 +46,11 @@ "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; + partitionName = [ + "owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP" + "all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP" + ]; + # See slurm.conf(5) for more details about these options. extraConfig = '' # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but @@ -37,6 +70,16 @@ # Enable task/affinity to allow the jobs to run in a specified subset of # the resources. Use the task/cgroup plugin to enable process containment. TaskPlugin=task/affinity,task/cgroup + + # Power off unused nodes until they are requested + SuspendProgram=${suspendProgram} + SuspendTimeout=60 + ResumeProgram=${resumeProgram} + ResumeTimeout=300 + SuspendExcNodes=hut + + # Turn the nodes off after 1 hour of inactivity + SuspendTime=3600 ''; }; } diff --git a/m/hut/slurm-daemon.nix b/m/hut/slurm-daemon.nix index e6ab227..e7fab8b 100644 --- a/m/hut/slurm-daemon.nix +++ b/m/hut/slurm-daemon.nix @@ -3,9 +3,5 @@ { services.slurm = { server.enable = true; - partitionName = [ - "owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP" - "all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP" - ]; }; } -- 2.49.0 From a758eef354b1341a96a36ef7c8528aac9b050235 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 8 Sep 2023 17:20:32 +0200 Subject: [PATCH 172/472] Block ssfhead from reaching our slurm daemon --- m/common/net.nix | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/m/common/net.nix b/m/common/net.nix index e542e5f..2057143 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -23,7 +23,14 @@ allowedTCPPorts = [ 22 ]; # FIXME: For slurmd as it requests the compute nodes to connect to us - allowedTCPPortRanges = [ { from=1024; to=65535; } ]; + #allowedTCPPortRanges = [ { from=1024; to=65535; } ]; + + extraCommands = '' + # Prevent ssfhead from contacting our slurmd daemon + iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-log-refuse + # But accept traffic to slurm ports from any other node in the subnet + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept + ''; }; extraHosts = '' -- 2.49.0 From f231dc81f17999ca6fbe490a5f664319f0a216bd Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 8 Sep 2023 17:51:37 +0200 Subject: [PATCH 173/472] Allow only some ports for srun --- m/common/net.nix | 2 ++ m/common/slurm.nix | 3 +++ 2 files changed, 5 insertions(+) diff --git a/m/common/net.nix b/m/common/net.nix index 2057143..57f15be 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -30,6 +30,8 @@ iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-log-refuse # But accept traffic to slurm ports from any other node in the subnet iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept + # We also need to open the srun port range + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept ''; }; diff --git a/m/common/slurm.nix b/m/common/slurm.nix index 988e1d8..5404d0c 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -80,6 +80,9 @@ in { # Turn the nodes off after 1 hour of inactivity SuspendTime=3600 + + # Reduce port range so we can allow only this range in the firewall + SrunPortRange=60000-61000 ''; }; } -- 2.49.0 From 868f825e267a793d9b62a0d1e669f293a8eaff1b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 8 Sep 2023 18:13:04 +0200 Subject: [PATCH 174/472] Make exporters listen in localhost only --- m/hut/monitoring.nix | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 8a8646a..b455c1d 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -19,6 +19,7 @@ enable = true; port = 9001; retentionTime = "1y"; + listenAddress = "127.0.0.1"; }; systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false; @@ -48,13 +49,18 @@ user = "root"; configFile = ./ipmi.yml; #extraFlags = [ "--log.level=debug" ]; + listenAddress = "127.0.0.1"; }; node = { enable = true; enabledCollectors = [ "systemd" ]; port = 9002; + listenAddress = "127.0.0.1"; + }; + smartctl = { + enable = true; + listenAddress = "127.0.0.1"; }; - smartctl.enable = true; }; scrapeConfigs = [ -- 2.49.0 From e3e6e7662dceb3ae38d72e8d2207ab5362352e08 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 8 Sep 2023 18:22:48 +0200 Subject: [PATCH 175/472] Remove unused large port hole in firewall --- m/common/net.nix | 4 ---- 1 file changed, 4 deletions(-) diff --git a/m/common/net.nix b/m/common/net.nix index 57f15be..d90f8ba 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -21,10 +21,6 @@ firewall = { enable = true; allowedTCPPorts = [ 22 ]; - - # FIXME: For slurmd as it requests the compute nodes to connect to us - #allowedTCPPortRanges = [ { from=1024; to=65535; } ]; - extraCommands = '' # Prevent ssfhead from contacting our slurmd daemon iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-log-refuse -- 2.49.0 From 2a0254b6841ad72a4f9a2423d9befe32b7f77653 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 8 Sep 2023 19:01:57 +0200 Subject: [PATCH 176/472] Add encrypted munge key with agenix --- m/common/slurm.nix | 13 ++++++++++++- secrets/munge-key.age | Bin 0 -> 2007 bytes secrets/secrets.nix | 5 +++-- 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 secrets/munge-key.age diff --git a/m/common/slurm.nix b/m/common/slurm.nix index 5404d0c..22ffae6 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -1,4 +1,4 @@ -{ pkgs, lib, ... }: +{ config, pkgs, lib, ... }: let suspendProgram = pkgs.writeScript "suspend.sh" '' @@ -85,4 +85,15 @@ in { SrunPortRange=60000-61000 ''; }; + + age.secrets.mungeKey = { + file = ../../secrets/munge-key.age; + owner = "munge"; + group = "munge"; + }; + + services.munge = { + enable = true; + password = config.age.secrets.mungeKey.path; + }; } diff --git a/secrets/munge-key.age b/secrets/munge-key.age new file mode 100644 index 0000000000000000000000000000000000000000..ead42c816eb6d3ba2973e4dcb0a62c55e6f5ac8b GIT binary patch literal 2007 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4jI^loE?3B}a5ptg zO)1Mt%yPF(HLgfXi7YoaG|F|$3v*6OEix%A%_;TGH}<#iu;4Pz%5u`q)(%cHHS_f~ zFinkg3kogw3UJExu?+CmFA2`f40g{5G0AlebwszVINdQZ*HOW-&@0m2pfoZdDJUQy zqbyt7*)zq`tT4PFJlioO-N3^=smRpGEhsnC*@Vl*qslF((mBu2#njZ;%e>Ga)X31Y z($~>LKP=HV*C#SEKc^(KtVBB^F%;c4k4U4+pmc?*Am=Ja|AMSC?~oAf!pz7-Q%@7O z;-ZME%#>0O<8XgxuhPOOWAj3vJPR(fRL}C{(!AtwgTh?joN||vyc{3*a>p!v%QW+1 z*X(2?ujJ%H)2LE?r$BVuN|H-+0|FJaGs5!=t3pEjOHAC$(+qur4cxOWOfr1EiwjJA zd|kpq6EjUxoy{wat3tU-y$a1ztBg~VJ^i!`ErN>6^z;3zqQV^Aqx>x0obrPW0z)%` zT}l#NO)zZp@-p-ZPge-@$WAw~bc}HFOp7cC^fbzg%#8F1$*IV3$#5|-axDqV4K9w- z_6`ZmNarex@G&sUFf4K`Pcz6ht}t}X&T@1&Eb}+dw{RDWl9F(yYkeGrJ_RIKrqRBqu5)oy*rR zIm9ocEITYS(=RD7#LX+IBFW1#FFngLE6U5?-Lcpy(=aHdEK3_RmG}m`8Kzg}OIE7|bctheXS~tBYHL*BV!NStaSu3(OOd%va zMnT6-!JjKHBR|I}F+0Gy+&v)DHzzyTqQunD*DE00ILN&)*(co7JtU{dBq=gAH#Ez) z+&$7MCBHnfOk3L`B{IFhEYpO`(lXFjJIFP$&@Cs&z}%BdS65ddELh*PNZU0kG|4E# z*tI+;!YDP>u{^QFHP^{JD!b6KEIHrI$1&G0H#40pO8QdjB#S=XY4ROaagQ$X-%@M} z-thOev&Wkc3+CmPU8|a>d-#Gw^afcU+4FqcSc92vx=oZ;KjwXy;kMoS>vQ;B%fsH= ztbY=cz;kXv#m0h_GE)=&-*N4^v%vGd>XxZDWW>GHuh+h?j&fo8e6M-#k2eC6Mwtiq zM6d)Ie7e7G8LzKuntxvp^XlS=)!7mkS<`>&|9a-I=)05hgZNJ_75W>Vxh~`^zW2AI zh_z;A^feFFM%~ja=i2zie%8(ptk)HOV#V3!{5#*lh@rALD#*|CiN?ZRJTeoDv?s1= zYCLOjNRL-euCL*0SKaouuFp%1OK#rxHfj66`P6Y4yJ^pMTzT6c!dadX>nK*s$r9)> z?}+fENku>QHwyiAeYe|n=iIr>_?(5>%u;M<;pX(!t2zvV2?_gq)0E|OR& zV;L8gxM;!GpxmS#t0MoFT)N1a&OLMOKe0HScD19KQ`au-N??o$-}UN({5yyIGpyaO zV}FXO%-E$C8Cnwdbuar(!~Ro~K7AD`;@NdQNnr99J>5XocdsqJ-Qni6(*1Gb;Jg3Z zwwQ)5F)GMjQSnXrgU%YOZ#pw9d*42rf9*=)t=GXHJcGB{ta$Ah_(RP9yj!$ky7r%G zAK2{L!|GQbN$Ylc9qL)qaNOEUnd_2h^(xkC-)(W1lXq4x^kFyO!t$c}yx-l1#R``+ zP5D(Ul4tN+`b_WMcyeb&;#To3r_MeK$lf?pBz~Sp=tY-`$$OJ79nUz?C>3FT;a`ow znMvBaJ{oE7%?r#YeAwhR=+2 z)u-Qdl~KV7j_tbJ5-)!*V|yWQ+_#3avl3+K9RJbCj_OAyZk{VA^9 zk;9<5AJ|~uH(~B<>82$s**IGG@9WOr^nGXbnT4B^&hTzY z3XWzs*u>(m`C;p)cl(|m!2IL$ot`{&JOt*>%v@F E0FV+i@Bjb+ literal 0 HcmV?d00001 diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 34fb177..9dce058 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -3,11 +3,12 @@ let adminsKeys = builtins.attrValues keys.admins; hut = [ keys.hosts.hut ] ++ adminsKeys; # Only expose ceph keys to safe nodes and admins - ceph = keys.hostGroup.safe ++ adminsKeys; + safe = keys.hostGroup.safe ++ adminsKeys; in { "ovni-token.age".publicKeys = hut; "nosv-token.age".publicKeys = hut; - "ceph-user.age".publicKeys = ceph; + "ceph-user.age".publicKeys = safe; + "munge-key.age".publicKeys = safe; } -- 2.49.0 From b120a7ca85027fb6325406e77298b60c5e176e8a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 12 Sep 2023 12:19:43 +0200 Subject: [PATCH 177/472] Serve the nix store from hut --- m/hut/configuration.nix | 1 + m/hut/nix-serve.nix | 16 ++++++++++++++++ secrets/nix-serve.age | 12 ++++++++++++ secrets/secrets.nix | 1 + 4 files changed, 30 insertions(+) create mode 100644 m/hut/nix-serve.nix create mode 100644 secrets/nix-serve.age diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index fc4d2ab..9187812 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -9,6 +9,7 @@ ./monitoring.nix ./nfs.nix ./slurm-daemon.nix + ./nix-serve.nix #./pxe.nix ]; diff --git a/m/hut/nix-serve.nix b/m/hut/nix-serve.nix new file mode 100644 index 0000000..35ccd72 --- /dev/null +++ b/m/hut/nix-serve.nix @@ -0,0 +1,16 @@ +{ config, ... }: + +{ + age.secrets.nixServe.file = ../../secrets/nix-serve.age; + + services.nix-serve = { + enable = true; + # Only listen locally, as we serve it via ssh + bindAddress = "127.0.0.1"; + port = 5000; + + secretKeyFile = config.age.secrets.nixServe.path; + # Public key: + # jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0= + }; +} diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age new file mode 100644 index 0000000..f59c323 --- /dev/null +++ b/secrets/nix-serve.age @@ -0,0 +1,12 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg d144D+VvxhYgKtH//uD2qNuVnYX6bh74YqkyM3ZjBwU +0IeVmFAf4U8Sm0d01O6ZwJ1V2jl/mSMl4wF0MP5LrIg +-> ssh-ed25519 CAWG4Q H4nKxue/Cj/3KUF5A+/ygHMjjArwgx3SIWwXcqFtyUo +4k5NJkLUrueLYiPkr2LAwQLWmuaOIsDmV/86ravpleU +-> ssh-ed25519 MSF3dg HpgUAFHLPs4w0cdJHqTwf8lySkTeV9O9NnBf49ClDHs +foPIUUgAYe1YSDy6+aMfjN7xv9xud9fDmhRlIztHoEo +-> vLkF\<-grease +3GRT+W8gYSpjl/a6Ix9+g9UJnTpl1ZH/oucfR801vfE8y77DV2Jxz/XJwzxYxKG5 +YEhiTGMNbXw/V7E5aVSz6Bdc +--- GtiHKCZdHByq9j0BSLd544PhbEwTN138E8TFdxipeiA +G$SRATh]n8,Hs=p'+j9):)Y8I8:olZ3PMF;rY$yLٜΜUs16ǾLb \ No newline at end of file diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 9dce058..95b43ac 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -8,6 +8,7 @@ in { "ovni-token.age".publicKeys = hut; "nosv-token.age".publicKeys = hut; + "nix-serve.age".publicKeys = hut; "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; -- 2.49.0 From 2646ad4b707549d7e5c5fe677d233084dab98ef6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 12 Sep 2023 16:39:45 +0200 Subject: [PATCH 178/472] Enable fstrim service --- m/common/fs.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/common/fs.nix b/m/common/fs.nix index 60fccef..10339d0 100644 --- a/m/common/fs.nix +++ b/m/common/fs.nix @@ -6,6 +6,9 @@ fsType = "ext4"; }; + # Trim unused blocks weekly + services.fstrim.enable = true; + swapDevices = [ { device = "/dev/disk/by-label/swap"; } ]; -- 2.49.0 From 5a5f4672cd40a471e55dd574c6c59704403419f9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 13 Sep 2023 15:57:13 +0200 Subject: [PATCH 179/472] Monitor storage nodes via IPMI too --- m/hut/targets.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/hut/targets.yml b/m/hut/targets.yml index 2cecd66..8924735 100644 --- a/m/hut/targets.yml +++ b/m/hut/targets.yml @@ -7,5 +7,9 @@ - 10.0.40.106 - 10.0.40.107 - 10.0.40.108 + # Storage + - 10.0.40.141 + - 10.0.40.142 + - 10.0.40.143 labels: job: ipmi-lan -- 2.49.0 From 7bfd786c01c36131cd00b90fc6a9503fd1226578 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 13 Sep 2023 17:44:24 +0200 Subject: [PATCH 180/472] Update slurm to 23.02.05.1 --- pkgs/overlay.nix | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 03208c5..de16ffe 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -32,4 +32,16 @@ final: prev: lua = prev.lua5_4; fmt = prev.fmt_8; }) ceph ceph-client; + + # Update slurm to 23.02.5 to fix the firewall problem with pmix + slurm = prev.slurm.overrideAttrs (old: rec { + version = "23.02.5.1"; + src = prev.fetchFromGitHub { + owner = "SchedMD"; + repo = "slurm"; + # The release tags use - instead of . + rev = "slurm-${builtins.replaceStrings ["."] ["-"] version}"; + sha256 = "sha256-9VvZ8xySYFyBa5tZzf5WCShbEDpqE1/5t76jXX6t+bc="; + }; + }); } -- 2.49.0 From d522113cb92d60e14b46b6506bb0c5a5e143116d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Sep 2023 15:45:43 +0200 Subject: [PATCH 181/472] Open ports in firewall of compute nodes --- m/module/slurm-firewall.nix | 8 ++++++++ m/owl1/configuration.nix | 1 + m/owl2/configuration.nix | 1 + 3 files changed, 10 insertions(+) create mode 100644 m/module/slurm-firewall.nix diff --git a/m/module/slurm-firewall.nix b/m/module/slurm-firewall.nix new file mode 100644 index 0000000..8f52022 --- /dev/null +++ b/m/module/slurm-firewall.nix @@ -0,0 +1,8 @@ +{ ... }: + +{ + networking.firewall = { + # Required for PMIx in SLURM, we should find a better way + allowedTCPPortRanges = [ { from=1024; to=65535; } ]; + }; +} diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index 8255898..5afda72 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/main.nix ../module/ceph.nix + ../module/slurm-firewall.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 0af3c40..9636760 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/main.nix ../module/ceph.nix + ../module/slurm-firewall.nix ]; # Select the this using the ID to avoid mismatches -- 2.49.0 From 8dbee06d1dc010c20e5df535c181112725276563 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Sep 2023 15:46:18 +0200 Subject: [PATCH 182/472] Revert "Update slurm to 23.02.05.1" This reverts commit 7bfd786c01c36131cd00b90fc6a9503fd1226578. --- pkgs/overlay.nix | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index de16ffe..03208c5 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -32,16 +32,4 @@ final: prev: lua = prev.lua5_4; fmt = prev.fmt_8; }) ceph ceph-client; - - # Update slurm to 23.02.5 to fix the firewall problem with pmix - slurm = prev.slurm.overrideAttrs (old: rec { - version = "23.02.5.1"; - src = prev.fetchFromGitHub { - owner = "SchedMD"; - repo = "slurm"; - # The release tags use - instead of . - rev = "slurm-${builtins.replaceStrings ["."] ["-"] version}"; - sha256 = "sha256-9VvZ8xySYFyBa5tZzf5WCShbEDpqE1/5t76jXX6t+bc="; - }; - }); } -- 2.49.0 From 599b23ef52945d63cf98e317dcb8bf87cfd7d1a9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Sep 2023 18:09:05 +0200 Subject: [PATCH 183/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'bscpkgs': 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=6122fef92701701e1a0622550ac0fc5c2beb5906' (2023-09-07) → 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=3a4062ac04be6263c64a481420d8e768c2521b80' (2023-09-14) --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 294fa56..3c654b9 100644 --- a/flake.lock +++ b/flake.lock @@ -29,11 +29,11 @@ ] }, "locked": { - "lastModified": 1694077645, + "lastModified": 1694708510, "narHash": "sha256-72bvRBhq8Q8V6ibsR9lyBE92V2EC6C6Ek3J5cOM79So=", "ref": "refs/heads/master", - "rev": "6122fef92701701e1a0622550ac0fc5c2beb5906", - "revCount": 860, + "rev": "3a4062ac04be6263c64a481420d8e768c2521b80", + "revCount": 862, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, -- 2.49.0 From ff98ba47c427371394ed48a73b247765d5f48db9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 15 Sep 2023 09:13:24 +0200 Subject: [PATCH 184/472] Don't fetch registry flakes from the net --- m/common/main.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/common/main.nix b/m/common/main.nix index a5f8e45..5e7aa34 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -32,6 +32,9 @@ "jungle=${theFlake.outPath}" ]; + nix.settings.flake-registry = + pkgs.writeText "global-registry.json" ''{"flakes":[],"version":2}''; + nix.registry.nixpkgs.flake = nixpkgs; nix.registry.bscpkgs.flake = bscpkgs; nix.registry.jungle.flake = theFlake; -- 2.49.0 From 1ae5d9e25ee1e01bef7d674aa75ef39ee74c0682 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Sep 2023 18:28:09 +0200 Subject: [PATCH 185/472] Use hut packages as the default package set Allows the user to directly access nixpkgs and bscpkgs from the top level as `nix build jungle#htop` and `nix build jungle#bsc.ovni`. --- flake.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index 10be75a..d9fd95f 100644 --- a/flake.nix +++ b/flake.nix @@ -26,6 +26,6 @@ in lake2 = mkConf "lake2"; }; - packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs; + packages.x86_64-linux = self.nixosConfigurations.hut.pkgs; }; } -- 2.49.0 From ba2b74fd5abc00f97c8cb5175534b1d5347ca20e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 15 Sep 2023 11:58:10 +0200 Subject: [PATCH 186/472] Add bscpkgs and nixpkgs top level attributes Allows the evaluation of packages of the intermediate overlays. --- flake.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index d9fd95f..b74cd8e 100644 --- a/flake.nix +++ b/flake.nix @@ -26,6 +26,9 @@ in lake2 = mkConf "lake2"; }; - packages.x86_64-linux = self.nixosConfigurations.hut.pkgs; + packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { + bscpkgs = bscpkgs.packages.x86_64-linux; + nixpkgs = nixpkgs.legacyPackages.x86_64-linux; + }; }; } -- 2.49.0 From aa64e9ef24874987f7937f06032e16d9ca2e7789 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 15 Sep 2023 11:58:47 +0200 Subject: [PATCH 187/472] Remove bscpkgs from the registry and nixPath This is done to prevent accidental evaluations where the nixpkgs input of bscpkgs is still pointing to a different version that the one specified in the jungle flake. Instead use jungle#bscpkgs.X to get a package from bscpkgs. --- m/common/main.nix | 2 -- 1 file changed, 2 deletions(-) diff --git a/m/common/main.nix b/m/common/main.nix index 5e7aa34..025a7c5 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -28,7 +28,6 @@ nix.nixPath = [ "nixpkgs=${nixpkgs}" - "bscpkgs=${bscpkgs}" "jungle=${theFlake.outPath}" ]; @@ -36,7 +35,6 @@ pkgs.writeText "global-registry.json" ''{"flakes":[],"version":2}''; nix.registry.nixpkgs.flake = nixpkgs; - nix.registry.bscpkgs.flake = bscpkgs; nix.registry.jungle.flake = theFlake; environment.systemPackages = with pkgs; [ -- 2.49.0 From 70a9e855cf353fa4f0e56bf2fc1467f09bc06e16 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Sun, 17 Sep 2023 22:27:51 +0200 Subject: [PATCH 188/472] Enable direnv integration --- m/common/main.nix | 2 ++ m/common/zsh.nix | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/m/common/main.nix b/m/common/main.nix index 025a7c5..999f043 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -45,6 +45,8 @@ bsc.osumb ]; + programs.direnv.enable = true; + systemd.services."serial-getty@ttyS0" = { enable = true; wantedBy = [ "getty.target" ]; diff --git a/m/common/zsh.nix b/m/common/zsh.nix index 666e34b..5cfcb7f 100644 --- a/m/common/zsh.nix +++ b/m/common/zsh.nix @@ -2,7 +2,6 @@ { environment.systemPackages = with pkgs; [ - direnv zsh-completions nix-zsh-completions ]; -- 2.49.0 From 3040a803b292075e105d55f0183da347738489d7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 20 Sep 2023 18:26:48 +0200 Subject: [PATCH 189/472] Mount the hut nix store for SLURM jobs --- m/module/slurm-hut-nix-store.nix | 19 +++++++++++++++++++ m/owl1/configuration.nix | 1 + m/owl2/configuration.nix | 1 + 3 files changed, 21 insertions(+) create mode 100644 m/module/slurm-hut-nix-store.nix diff --git a/m/module/slurm-hut-nix-store.nix b/m/module/slurm-hut-nix-store.nix new file mode 100644 index 0000000..2ec8e2f --- /dev/null +++ b/m/module/slurm-hut-nix-store.nix @@ -0,0 +1,19 @@ +{ ... }: + +{ + # Mount the hut nix store via NFS + fileSystems."/mnt/hut-nix-store" = { + device = "hut:/nix/store"; + fsType = "nfs"; + options = [ "ro" ]; + }; + + systemd.services.slurmd.serviceConfig = { + # When running a job, bind the hut store in /nix/store so the paths are + # available too. + # FIXME: This doesn't keep the programs in /run/current-system/sw/bin + # available in the store. Ideally they should be merged but the overlay FS + # doesn't work when the underlying directories change. + BindReadOnlyPaths = "/mnt/hut-nix-store:/nix/store"; + }; +} diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index 5afda72..d21ccfd 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -5,6 +5,7 @@ ../common/main.nix ../module/ceph.nix ../module/slurm-firewall.nix + ../module/slurm-hut-nix-store.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 9636760..5fdabbe 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -5,6 +5,7 @@ ../common/main.nix ../module/ceph.nix ../module/slurm-firewall.nix + ../module/slurm-hut-nix-store.nix ]; # Select the this using the ID to avoid mismatches -- 2.49.0 From 9071a4de8bd0d3d33dfeb1a81655d0f1ff7c48ea Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 21 Sep 2023 21:34:18 +0200 Subject: [PATCH 190/472] Add prometheus-slurm-exporter package --- pkgs/overlay.nix | 2 ++ pkgs/slurm-exporter.nix | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 pkgs/slurm-exporter.nix diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 03208c5..47a4f3a 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -32,4 +32,6 @@ final: prev: lua = prev.lua5_4; fmt = prev.fmt_8; }) ceph ceph-client; + + prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; } diff --git a/pkgs/slurm-exporter.nix b/pkgs/slurm-exporter.nix new file mode 100644 index 0000000..49070cd --- /dev/null +++ b/pkgs/slurm-exporter.nix @@ -0,0 +1,22 @@ +{ buildGoModule, fetchFromGitHub, lib }: + +buildGoModule rec { + pname = "prometheus-slurm-exporter"; + version = "0.20"; + + src = fetchFromGitHub { + rev = version; + owner = "vpenso"; + repo = pname; + sha256 = "sha256-KS9LoDuLQFq3KoKpHd8vg1jw20YCNRJNJrnBnu5vxvs="; + }; + + vendorSha256 = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI="; + doCheck = false; + + meta = with lib; { + description = "Prometheus SLURM Exporter"; + homepage = "https://github.com/vpenso/prometheus-slurm-exporter"; + platforms = platforms.linux; + }; +} -- 2.49.0 From 5f492ee1d73874e57594c7324f7798bf2cb41ea8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 21 Sep 2023 21:38:34 +0200 Subject: [PATCH 191/472] Enable slurm-exporter service --- m/hut/monitoring.nix | 3 +++ m/module/slurm-exporter.nix | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 m/module/slurm-exporter.nix diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index b455c1d..e84dbe9 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -1,6 +1,8 @@ { config, lib, ... }: { + imports = [ ../module/slurm-exporter.nix ]; + services.grafana = { enable = true; settings = { @@ -73,6 +75,7 @@ "127.0.0.1:9323" "127.0.0.1:9252" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" + "127.0.0.1:9341" # Slurm exporter ]; }]; } diff --git a/m/module/slurm-exporter.nix b/m/module/slurm-exporter.nix new file mode 100644 index 0000000..87b047d --- /dev/null +++ b/m/module/slurm-exporter.nix @@ -0,0 +1,35 @@ +{ config, lib, pkgs, ... }: + +# See also: https://github.com/NixOS/nixpkgs/pull/112010 + +with lib; + +{ + users = { + users."slurm-exporter" = { + description = "Prometheus slurm exporter service user"; + isSystemUser = true; + group = "slurm-exporter"; + }; + groups = { + "slurm-exporter" = {}; + }; + }; + + systemd.services."prometheus-slurm-exporter" = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Restart = mkDefault "always"; + PrivateTmp = mkDefault true; + WorkingDirectory = mkDefault "/tmp"; + DynamicUser = mkDefault true; + User = "slurm-exporter"; + Group = "slurm-exporter"; + ExecStart = '' + ${pkgs.prometheus-slurm-exporter}/bin/prometheus-slurm-exporter --listen-address "127.0.0.1:9341" + ''; + Environment = [ "PATH=${pkgs.slurm}/bin" ]; + }; + }; +} -- 2.49.0 From 0ce79ed79e1fe453c2fb70a9a6ecfad449a67d0b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 21 Sep 2023 22:18:30 +0200 Subject: [PATCH 192/472] Set the SLURM_CONF variable --- m/module/slurm-exporter.nix | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/m/module/slurm-exporter.nix b/m/module/slurm-exporter.nix index 87b047d..55163c4 100644 --- a/m/module/slurm-exporter.nix +++ b/m/module/slurm-exporter.nix @@ -1,6 +1,7 @@ { config, lib, pkgs, ... }: # See also: https://github.com/NixOS/nixpkgs/pull/112010 +# And: https://github.com/NixOS/nixpkgs/pull/115839 with lib; @@ -29,7 +30,12 @@ with lib; ExecStart = '' ${pkgs.prometheus-slurm-exporter}/bin/prometheus-slurm-exporter --listen-address "127.0.0.1:9341" ''; - Environment = [ "PATH=${pkgs.slurm}/bin" ]; + Environment = [ + "PATH=${pkgs.slurm}/bin" + # We need to specify the slurm config to be able to talk to the slurmd + # daemon. + "SLURM_CONF=${config.services.slurm.etcSlurm}/slurm.conf" + ]; }; }; } -- 2.49.0 From 8634a9e1337225fac887d643056440fe1a734ed6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 22 Sep 2023 10:13:06 +0200 Subject: [PATCH 193/472] Remove user/group when using DynamicUsers --- m/module/slurm-exporter.nix | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/m/module/slurm-exporter.nix b/m/module/slurm-exporter.nix index 55163c4..ad31f45 100644 --- a/m/module/slurm-exporter.nix +++ b/m/module/slurm-exporter.nix @@ -6,17 +6,6 @@ with lib; { - users = { - users."slurm-exporter" = { - description = "Prometheus slurm exporter service user"; - isSystemUser = true; - group = "slurm-exporter"; - }; - groups = { - "slurm-exporter" = {}; - }; - }; - systemd.services."prometheus-slurm-exporter" = { wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; @@ -25,8 +14,6 @@ with lib; PrivateTmp = mkDefault true; WorkingDirectory = mkDefault "/tmp"; DynamicUser = mkDefault true; - User = "slurm-exporter"; - Group = "slurm-exporter"; ExecStart = '' ${pkgs.prometheus-slurm-exporter}/bin/prometheus-slurm-exporter --listen-address "127.0.0.1:9341" ''; -- 2.49.0 From ebc5c4d84ffc3424df866501dcb5dbaa8d4ba635 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 22 Sep 2023 10:50:14 +0200 Subject: [PATCH 194/472] Allow anonymous access to grafana --- m/hut/monitoring.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index e84dbe9..6c7d093 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -14,6 +14,7 @@ http_addr = "127.0.0.1"; }; feature_toggles.publicDashboards = true; + "auth.anonymous".enabled = true; }; }; -- 2.49.0 From 9874da566de57730192ddf00f8fb1d777b1b1753 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 28 Sep 2023 14:11:30 +0200 Subject: [PATCH 195/472] Add runner for gitlab.bsc.es --- m/hut/gitlab-runner.nix | 13 +++++++++++++ secrets/gitlab-bsc-es-token.age | 11 +++++++++++ secrets/secrets.nix | 1 + 3 files changed, 25 insertions(+) create mode 100644 secrets/gitlab-bsc-es-token.age diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index d9fedb5..09c871b 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -2,6 +2,7 @@ { age.secrets.ovniToken.file = ../../secrets/ovni-token.age; + age.secrets.gitlabToken.file = ../../secrets/gitlab-bsc-es-token.age; age.secrets.nosvToken.file = ../../secrets/nosv-token.age; services.gitlab-runner = { @@ -20,6 +21,18 @@ SHELL = "${pkgs.bash}/bin/bash"; }; }; + gitlab-bsc-es-shell = { + registrationConfigFile = config.age.secrets.gitlabToken.path; + executor = "shell"; + tagList = [ "nix" "xeon" ]; + registrationFlags = [ + # Using space doesn't work, and causes it to misread the next flag + "--locked='false'" + ]; + environmentVariables = { + SHELL = "${pkgs.bash}/bin/bash"; + }; + }; ovni-docker = { registrationConfigFile = config.age.secrets.ovniToken.path; dockerImage = "debian:stable"; diff --git a/secrets/gitlab-bsc-es-token.age b/secrets/gitlab-bsc-es-token.age new file mode 100644 index 0000000..ffe7aaf --- /dev/null +++ b/secrets/gitlab-bsc-es-token.age @@ -0,0 +1,11 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg caTbx0NBmsTSmZH4HtBaxhsauWqWUDTesJqT08UsoEQ +8ND31xuco+H8d5SKg8xsCFRPVDhU4d8UKwV1BnmKVjQ +-> ssh-ed25519 CAWG4Q 4ETYuhCwHHECkut4DWDknMMgpAvFqtzLWVC2Wi2L8FM +BGMvRnAfd8qZG5hzLefmk32FkGvwzE9pqBUyx4JY0co +-> ssh-ed25519 MSF3dg hj5QL4ZfylN8/W/MXQHvVqtI7mRvlQOYr8HsaQEmPB0 +kvB7sljmmkswSGZDQnrwdTbTsN78EAwH3pz1pPe0Hu0 +-> )Q-grease vHF} [8p1> @7z;C"/ +tgSUKFyyrf2jLXZp+pakigwB2fRO/WFj2Qnt1aPjtVPEK92JbJ4 +--- xzM0AhV4gTQE0Q7inJNo9vFj+crJQxWeI7u9pl7bqAI +6nGJ0B7Fbٽ2L]2zl&eKx9SWNV"MfKHUC:1b;9StDuѧϢ̟f71I(d \ No newline at end of file diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 95b43ac..991cdd5 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -6,6 +6,7 @@ let safe = keys.hostGroup.safe ++ adminsKeys; in { + "gitlab-bsc-es-token.age".publicKeys = hut; "ovni-token.age".publicKeys = hut; "nosv-token.age".publicKeys = hut; "nix-serve.age".publicKeys = hut; -- 2.49.0 From fbe238f5b6b744ad5f2240eda96e0d4a4d06ba0e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 28 Sep 2023 14:14:41 +0200 Subject: [PATCH 196/472] Temporarily disable pm runners --- m/hut/gitlab-runner.nix | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index 09c871b..4d85848 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -9,18 +9,6 @@ enable = true; settings.concurrent = 5; services = { - ovni-shell = { - registrationConfigFile = config.age.secrets.ovniToken.path; - executor = "shell"; - tagList = [ "nix" "xeon" ]; - registrationFlags = [ - # Using space doesn't work, and causes it to misread the next flag - "--locked='false'" - ]; - environmentVariables = { - SHELL = "${pkgs.bash}/bin/bash"; - }; - }; gitlab-bsc-es-shell = { registrationConfigFile = config.age.secrets.gitlabToken.path; executor = "shell"; @@ -33,32 +21,6 @@ SHELL = "${pkgs.bash}/bin/bash"; }; }; - ovni-docker = { - registrationConfigFile = config.age.secrets.ovniToken.path; - dockerImage = "debian:stable"; - tagList = [ "docker" "xeon" ]; - registrationFlags = [ - "--locked='false'" - "--docker-network-mode host" - ]; - environmentVariables = { - https_proxy = "http://localhost:23080"; - http_proxy = "http://localhost:23080"; - }; - }; - nosv-docker = { - registrationConfigFile = config.age.secrets.nosvToken.path; - dockerImage = "debian:stable"; - tagList = [ "docker" "xeon" ]; - registrationFlags = [ - "--docker-network-mode host" - "--docker-cpus 56" - ]; - environmentVariables = { - https_proxy = "http://localhost:23080"; - http_proxy = "http://localhost:23080"; - }; - }; }; }; -- 2.49.0 From fc9285f89d2540fd8fd4f99533240bbbf1b54669 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 3 Oct 2023 08:58:07 +0200 Subject: [PATCH 197/472] Monitor PM webpage via blackbox --- m/hut/blackbox.yml | 160 +++++++++++++++++++++++++++++++++++++++++++ m/hut/monitoring.nix | 34 +++++++++ 2 files changed, 194 insertions(+) create mode 100644 m/hut/blackbox.yml diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml new file mode 100644 index 0000000..232fb89 --- /dev/null +++ b/m/hut/blackbox.yml @@ -0,0 +1,160 @@ +modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_status_codes: [] # Defaults to 2xx + method: GET + http_with_proxy: + prober: http + http: + proxy_url: "http://127.0.0.1:3128" + skip_resolve_phase_with_proxy: true + http_with_proxy_and_headers: + prober: http + http: + proxy_url: "http://127.0.0.1:3128" + proxy_connect_header: + Proxy-Authorization: + - Bearer token + http_post_2xx: + prober: http + timeout: 5s + http: + method: POST + headers: + Content-Type: application/json + body: '{}' + http_post_body_file: + prober: http + timeout: 5s + http: + method: POST + body_file: "/files/body.txt" + http_basic_auth_example: + prober: http + timeout: 5s + http: + method: POST + headers: + Host: "login.example.com" + basic_auth: + username: "username" + password: "mysecret" + http_2xx_oauth_client_credentials: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2"] + follow_redirects: true + preferred_ip_protocol: "ip4" + valid_status_codes: + - 200 + - 201 + oauth2: + client_id: "client_id" + client_secret: "client_secret" + token_url: "https://api.example.com/token" + endpoint_params: + grant_type: "client_credentials" + http_custom_ca_example: + prober: http + http: + method: GET + tls_config: + ca_file: "/certs/my_cert.crt" + http_gzip: + prober: http + http: + method: GET + compression: gzip + http_gzip_with_accept_encoding: + prober: http + http: + method: GET + compression: gzip + headers: + Accept-Encoding: gzip + tls_connect: + prober: tcp + timeout: 5s + tcp: + tls: true + tcp_connect_example: + prober: tcp + timeout: 5s + imap_starttls: + prober: tcp + timeout: 5s + tcp: + query_response: + - expect: "OK.*STARTTLS" + - send: ". STARTTLS" + - expect: "OK" + - starttls: true + - send: ". capability" + - expect: "CAPABILITY IMAP4rev1" + smtp_starttls: + prober: tcp + timeout: 5s + tcp: + query_response: + - expect: "^220 ([^ ]+) ESMTP (.+)$" + - send: "EHLO prober\r" + - expect: "^250-STARTTLS" + - send: "STARTTLS\r" + - expect: "^220" + - starttls: true + - send: "EHLO prober\r" + - expect: "^250-AUTH" + - send: "QUIT\r" + irc_banner_example: + prober: tcp + timeout: 5s + tcp: + query_response: + - send: "NICK prober" + - send: "USER prober prober prober :prober" + - expect: "PING :([^ ]+)" + send: "PONG ${1}" + - expect: "^:[^ ]+ 001" + icmp_example: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" + source_ip_address: "127.0.0.1" + dns_udp_example: + prober: dns + timeout: 5s + dns: + query_name: "www.prometheus.io" + query_type: "A" + valid_rcodes: + - NOERROR + validate_answer_rrs: + fail_if_matches_regexp: + - ".*127.0.0.1" + fail_if_all_match_regexp: + - ".*127.0.0.1" + fail_if_not_matches_regexp: + - "www.prometheus.io.\t300\tIN\tA\t127.0.0.1" + fail_if_none_matches_regexp: + - "127.0.0.1" + validate_authority_rrs: + fail_if_matches_regexp: + - ".*127.0.0.1" + validate_additional_rrs: + fail_if_matches_regexp: + - ".*127.0.0.1" + dns_soa: + prober: dns + dns: + query_name: "prometheus.io" + query_type: "SOA" + dns_tcp_example: + prober: dns + dns: + transport_protocol: "tcp" # defaults to "udp" + preferred_ip_protocol: "ip4" # defaults to "ip6" + query_name: "www.prometheus.io" diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 6c7d093..49ebf7c 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -64,6 +64,11 @@ enable = true; listenAddress = "127.0.0.1"; }; + blackbox = { + enable = true; + listenAddress = "127.0.0.1"; + configFile = ./blackbox.yml; + }; }; scrapeConfigs = [ @@ -77,6 +82,7 @@ "127.0.0.1:9252" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" "127.0.0.1:9341" # Slurm exporter + "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" ]; }]; } @@ -90,6 +96,34 @@ ]; }]; } + { + job_name = "blackbox"; + metrics_path = "/probe"; + params = { module = [ "http_2xx" ]; }; + static_configs = [{ + targets = [ + "https://pm.bsc.es/" + "https://jungle.bsc.es/" + ]; + }]; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; + } + ]; + } { # Scrape the IPMI info of the hosts remotely via LAN job_name = "ipmi-lan"; -- 2.49.0 From e4080cf931d5d1b6d8ff6d490ceadb8cd576137f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 3 Oct 2023 09:45:13 +0200 Subject: [PATCH 198/472] Monitor gitlab.bsc.es too --- m/hut/blackbox.yml | 1 + m/hut/monitoring.nix | 1 + 2 files changed, 2 insertions(+) diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml index 232fb89..c32d73a 100644 --- a/m/hut/blackbox.yml +++ b/m/hut/blackbox.yml @@ -3,6 +3,7 @@ modules: prober: http timeout: 5s http: + follow_redirects: true valid_status_codes: [] # Defaults to 2xx method: GET http_with_proxy: diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 49ebf7c..d5d9d03 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -104,6 +104,7 @@ targets = [ "https://pm.bsc.es/" "https://jungle.bsc.es/" + "https://gitlab.bsc.es/" ]; }]; relabel_configs = [ -- 2.49.0 From 425dca3e0026cdda8de11577ca27c599b72c66a3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 4 Oct 2023 07:55:26 +0200 Subject: [PATCH 199/472] Add docker runner too --- m/hut/gitlab-runner.nix | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index 4d85848..fd12123 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -21,6 +21,19 @@ SHELL = "${pkgs.bash}/bin/bash"; }; }; + gitlab-bsc-es-docker = { + registrationConfigFile = config.age.secrets.gitlabToken.path; + dockerImage = "debian:stable"; + tagList = [ "docker" "xeon" ]; + registrationFlags = [ + "--locked='false'" + "--docker-network-mode host" + ]; + environmentVariables = { + https_proxy = "http://localhost:23080"; + http_proxy = "http://localhost:23080"; + }; + }; }; }; -- 2.49.0 From 472f4b0334e5f914d731830f9541287b88b1c10d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 4 Oct 2023 08:19:09 +0200 Subject: [PATCH 200/472] Don't log SLURM connection attempts from ssfhead --- m/common/net.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/net.nix b/m/common/net.nix index d90f8ba..1c9c569 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -23,7 +23,7 @@ allowedTCPPorts = [ 22 ]; extraCommands = '' # Prevent ssfhead from contacting our slurmd daemon - iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-log-refuse + iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse # But accept traffic to slurm ports from any other node in the subnet iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept # We also need to open the srun port range -- 2.49.0 From f1486b84c1de5048a30ad03a2afccc560548e471 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Oct 2023 15:34:06 +0200 Subject: [PATCH 201/472] Make blackbox exporter use the proxy By default it was trying to reach the targets using the default gateway, but since the electrical cut of 2023-10-20, the login node has not enabled forwarding again. So better if we don't rely on it. Reviewed-By: Aleix Roca Nonell --- m/hut/blackbox.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml index c32d73a..83de003 100644 --- a/m/hut/blackbox.yml +++ b/m/hut/blackbox.yml @@ -3,6 +3,8 @@ modules: prober: http timeout: 5s http: + proxy_url: "http://127.0.0.1:23080" + skip_resolve_phase_with_proxy: true follow_redirects: true valid_status_codes: [] # Defaults to 2xx method: GET -- 2.49.0 From e5d85c1b38a045e3d2493e97063aa4b805273b25 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Oct 2023 16:04:15 +0200 Subject: [PATCH 202/472] Enable proxy for Grafana too The alerts need to contact the slack endpoint, so we add the proxy environment variables to the grafana systemd service. Reviewed-By: Aleix Roca Nonell --- m/hut/monitoring.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index d5d9d03..304590f 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -18,6 +18,9 @@ }; }; + # Make grafana alerts also use the proxy + systemd.services.grafana.environment = config.networking.proxy.envVars; + services.prometheus = { enable = true; port = 9001; -- 2.49.0 From 54c2bd119f77fcd418455e85c954d5fd74d6e624 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 24 Oct 2023 11:49:42 +0200 Subject: [PATCH 203/472] Add ICMP probes These probes check if we can reach several targets via ICMP, which is not proxied, so they can be used to see if ICMP forwarding is working in the login node. In particular, we test if we can reach the Google (8.8.8.8) and Cloudflare (1.1.1.1) DNS servers, the BSC gateway which responds to ping only from the intranet and the login node (ssfhead). Reviewed-By: Aleix Roca Nonell --- m/hut/blackbox.yml | 3 +-- m/hut/monitoring.nix | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml index 83de003..e69acd3 100644 --- a/m/hut/blackbox.yml +++ b/m/hut/blackbox.yml @@ -121,12 +121,11 @@ modules: - expect: "PING :([^ ]+)" send: "PONG ${1}" - expect: "^:[^ ]+ 001" - icmp_example: + icmp: prober: icmp timeout: 5s icmp: preferred_ip_protocol: "ip4" - source_ip_address: "127.0.0.1" dns_udp_example: prober: dns timeout: 5s diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 304590f..61fa22c 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -100,7 +100,7 @@ }]; } { - job_name = "blackbox"; + job_name = "blackbox-http"; metrics_path = "/probe"; params = { module = [ "http_2xx" ]; }; static_configs = [{ @@ -128,6 +128,36 @@ } ]; } + { + job_name = "blackbox-icmp"; + metrics_path = "/probe"; + params = { module = [ "icmp" ]; }; + static_configs = [{ + targets = [ + "1.1.1.1" + "8.8.8.8" + "ssfhead" + "gw.bsc.es" + ]; + }]; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; + } + ]; + } { # Scrape the IPMI info of the hosts remotely via LAN job_name = "ipmi-lan"; -- 2.49.0 From 19e195b894c430dccc32ec2822314daa19461d66 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 26 Oct 2023 12:36:06 +0200 Subject: [PATCH 204/472] Monitor anella instead of gw.bsc.es The target gw.bsc.es doesn't reply to our ICMP probes from hut. However, the anella hop in the tracepath is a good candidate to identify cuts between the login and the provider and between the provider and external hosts like Google or Cloudflare DNS. Reviewed-By: Aleix Roca Nonell --- m/hut/monitoring.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 61fa22c..b0761c3 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -137,7 +137,7 @@ "1.1.1.1" "8.8.8.8" "ssfhead" - "gw.bsc.es" + "anella-bsc.cesca.cat" ]; }]; relabel_configs = [ -- 2.49.0 From 84c4b6b81cb6cf6ed8f132e44d8ac06b6523fbb1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 6 Nov 2023 17:50:38 +0100 Subject: [PATCH 205/472] Switch bscpkgs URL to sourcehut Reviewed-by: Aleix Roca Nonell --- flake.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index b74cd8e..807721a 100644 --- a/flake.nix +++ b/flake.nix @@ -3,7 +3,7 @@ nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; agenix.url = "github:ryantm/agenix"; agenix.inputs.nixpkgs.follows = "nixpkgs"; - bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git"; + bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; }; -- 2.49.0 From db98b1f698a731c04820989314077de5afc7d1fe Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 6 Nov 2023 17:54:14 +0100 Subject: [PATCH 206/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'bscpkgs': 'git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=3a4062ac04be6263c64a481420d8e768c2521b80' (2023-09-14) → 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=f605f8e5e4a1f392589f1ea2b9ffe2074f72a538' (2023-10-31) Reviewed-by: Aleix Roca Nonell --- flake.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flake.lock b/flake.lock index 3c654b9..06bc967 100644 --- a/flake.lock +++ b/flake.lock @@ -29,17 +29,17 @@ ] }, "locked": { - "lastModified": 1694708510, - "narHash": "sha256-72bvRBhq8Q8V6ibsR9lyBE92V2EC6C6Ek3J5cOM79So=", + "lastModified": 1698767575, + "narHash": "sha256-y8v1YhB9vzBDsFPFu8BILnH9K71SUoCqfCLsGxI5yC4=", "ref": "refs/heads/master", - "rev": "3a4062ac04be6263c64a481420d8e768c2521b80", - "revCount": 862, + "rev": "f605f8e5e4a1f392589f1ea2b9ffe2074f72a538", + "revCount": 899, "type": "git", - "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" + "url": "https://git.sr.ht/~rodarima/bscpkgs" }, "original": { "type": "git", - "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" + "url": "https://git.sr.ht/~rodarima/bscpkgs" } }, "darwin": { -- 2.49.0 From 0d9c99a24ea4d676fe1d3c617377a00d19aec22f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 6 Nov 2023 23:03:56 +0100 Subject: [PATCH 207/472] BSC packages are no longer in bsc attribute Reviewed-by: Aleix Roca Nonell --- m/common/main.nix | 2 +- pkgs/overlay.nix | 44 +++++++++++++++++++++----------------------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/m/common/main.nix b/m/common/main.nix index 999f043..2844b3f 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -42,7 +42,7 @@ nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree ncdu config.boot.kernelPackages.perf ldns # From bsckgs overlay - bsc.osumb + osumb ]; programs.direnv.enable = true; diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 47a4f3a..efdeada 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -1,29 +1,27 @@ final: prev: { - bsc = prev.bsc.extend (bscFinal: bscPrev: { - # Set MPICH as default - mpi = bscFinal.mpich; + # Set MPICH as default + mpi = final.mpich; - # Configure the network for MPICH - mpich = with final; prev.mpich.overrideAttrs (old: { - buildInput = old.buildInputs ++ [ - libfabric - pmix - ]; - configureFlags = [ - "--enable-shared" - "--enable-sharedlib" - "--with-pm=no" - "--with-device=ch4:ofi" - "--with-pmi=pmix" - "--with-pmix=${final.pmix}" - "--with-libfabric=${final.libfabric}" - "--enable-g=log" - ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ - "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 - "FCFLAGS=-fallow-argument-mismatch" - ]; - }); + # Configure the network for MPICH + mpich = with final; prev.mpich.overrideAttrs (old: { + buildInput = old.buildInputs ++ [ + libfabric + pmix + ]; + configureFlags = [ + "--enable-shared" + "--enable-sharedlib" + "--with-pm=no" + "--with-device=ch4:ofi" + "--with-pmi=pmix" + "--with-pmix=${final.pmix}" + "--with-libfabric=${final.libfabric}" + "--enable-g=log" + ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ + "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 + "FCFLAGS=-fallow-argument-mismatch" + ]; }); # Update ceph to 18.2.0 until it lands in nixpkgs, see: -- 2.49.0 From bd83ca53ab1eff3754faa764f619a0b5c9131491 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 20 Nov 2023 12:37:50 +0100 Subject: [PATCH 208/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'agenix': 'github:ryantm/agenix/d8c973fd228949736dedf61b7f8cc1ece3236792' (2023-07-24) → 'github:ryantm/agenix/daf42cb35b2dc614d1551e37f96406e4c4a2d3e4' (2023-10-08) • Updated input 'bscpkgs': 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=f605f8e5e4a1f392589f1ea2b9ffe2074f72a538' (2023-10-31) → 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=e148de50d68b3eeafc3389b331cf042075971c4b' (2023-11-22) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/e56990880811a451abd32515698c712788be5720' (2023-09-02) → 'github:NixOS/nixpkgs/e4ad989506ec7d71f7302cc3067abd82730a4beb' (2023-11-19) Reviewed-by: Aleix Roca Nonell --- flake.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/flake.lock b/flake.lock index 06bc967..d807c7d 100644 --- a/flake.lock +++ b/flake.lock @@ -9,11 +9,11 @@ ] }, "locked": { - "lastModified": 1690228878, - "narHash": "sha256-9Xe7JV0krp4RJC9W9W9WutZVlw6BlHTFMiUP/k48LQY=", + "lastModified": 1696775529, + "narHash": "sha256-TYlE4B0ktPtlJJF9IFxTWrEeq+XKG8Ny0gc2FGEAdj0=", "owner": "ryantm", "repo": "agenix", - "rev": "d8c973fd228949736dedf61b7f8cc1ece3236792", + "rev": "daf42cb35b2dc614d1551e37f96406e4c4a2d3e4", "type": "github" }, "original": { @@ -29,11 +29,11 @@ ] }, "locked": { - "lastModified": 1698767575, - "narHash": "sha256-y8v1YhB9vzBDsFPFu8BILnH9K71SUoCqfCLsGxI5yC4=", + "lastModified": 1700663915, + "narHash": "sha256-2NiVhUgOs26nVqnypx8Er3sjyHD2cApjYV9iYrGY9xE=", "ref": "refs/heads/master", - "rev": "f605f8e5e4a1f392589f1ea2b9ffe2074f72a538", - "revCount": 899, + "rev": "e148de50d68b3eeafc3389b331cf042075971c4b", + "revCount": 912, "type": "git", "url": "https://git.sr.ht/~rodarima/bscpkgs" }, @@ -87,11 +87,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1693663421, - "narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=", + "lastModified": 1700390070, + "narHash": "sha256-de9KYi8rSJpqvBfNwscWdalIJXPo8NjdIZcEJum1mH0=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "e56990880811a451abd32515698c712788be5720", + "rev": "e4ad989506ec7d71f7302cc3067abd82730a4beb", "type": "github" }, "original": { -- 2.49.0 From 7afe7344acfc0cb97e3f62e82c6333e39140ce8a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Nov 2023 00:02:26 +0100 Subject: [PATCH 209/472] Remove old Ceph package overlay The Ceph package is now integrated in upstream nixpkgs. Reviewed-by: Aleix Roca Nonell --- pkgs/ceph.nix | 405 ----------------------------------------------- pkgs/overlay.nix | 7 - 2 files changed, 412 deletions(-) delete mode 100644 pkgs/ceph.nix diff --git a/pkgs/ceph.nix b/pkgs/ceph.nix deleted file mode 100644 index 5247862..0000000 --- a/pkgs/ceph.nix +++ /dev/null @@ -1,405 +0,0 @@ -{ lib -, stdenv -, runCommand -, fetchurl -, fetchFromGitHub -, fetchPypi - -# Build time -, cmake -, ensureNewerSourcesHook -, fmt -, git -, makeWrapper -, nasm -, pkg-config -, which - -# Tests -, nixosTests - -# Runtime dependencies -, arrow-cpp -, babeltrace -, boost179 -, bzip2 -, cryptsetup -, cunit -, doxygen -, gperf -, graphviz -, gtest -, icu -, libcap -, libcap_ng -, libnl -, libxml2 -, lttng-ust -, lua -, lz4 -, oath-toolkit -, openldap -, python310 -, rdkafka -, rocksdb -, snappy -, sqlite -, utf8proc -, zlib -, zstd - -# Optional Dependencies -, curl ? null -, expat ? null -, fuse ? null -, libatomic_ops ? null -, libedit ? null -, libs3 ? null -, yasm ? null - -# Mallocs -, gperftools ? null -, jemalloc ? null - -# Crypto Dependencies -, cryptopp ? null -, nspr ? null -, nss ? null - -# Linux Only Dependencies -, linuxHeaders -, util-linux -, libuuid -, udev -, keyutils -, rdma-core -, rabbitmq-c -, libaio ? null -, libxfs ? null -, liburing ? null -, zfs ? null -, ... -}: - -# We must have one crypto library -assert cryptopp != null || (nss != null && nspr != null); - -let - shouldUsePkg = pkg: if pkg != null && pkg.meta.available then pkg else null; - - optYasm = shouldUsePkg yasm; - optExpat = shouldUsePkg expat; - optCurl = shouldUsePkg curl; - optFuse = shouldUsePkg fuse; - optLibedit = shouldUsePkg libedit; - optLibatomic_ops = shouldUsePkg libatomic_ops; - optLibs3 = shouldUsePkg libs3; - - optJemalloc = shouldUsePkg jemalloc; - optGperftools = shouldUsePkg gperftools; - - optCryptopp = shouldUsePkg cryptopp; - optNss = shouldUsePkg nss; - optNspr = shouldUsePkg nspr; - - optLibaio = shouldUsePkg libaio; - optLibxfs = shouldUsePkg libxfs; - optZfs = shouldUsePkg zfs; - - # Downgrade rocksdb, 7.10 breaks ceph - rocksdb' = rocksdb.overrideAttrs { - version = "7.9.2"; - src = fetchFromGitHub { - owner = "facebook"; - repo = "rocksdb"; - rev = "refs/tags/v7.9.2"; - hash = "sha256-5P7IqJ14EZzDkbjaBvbix04ceGGdlWBuVFH/5dpD5VM="; - }; - }; - - hasRadosgw = optExpat != null && optCurl != null && optLibedit != null; - - # Malloc implementation (can be jemalloc, tcmalloc or null) - malloc = if optJemalloc != null then optJemalloc else optGperftools; - - # We prefer nss over cryptopp - cryptoStr = if optNss != null && optNspr != null then "nss" else - if optCryptopp != null then "cryptopp" else "none"; - - cryptoLibsMap = { - nss = [ optNss optNspr ]; - cryptopp = [ optCryptopp ]; - none = [ ]; - }; - - getMeta = description: with lib; { - homepage = "https://ceph.io/en/"; - inherit description; - license = with licenses; [ lgpl21 gpl2 bsd3 mit publicDomain ]; - maintainers = with maintainers; [ adev ak johanot krav ]; - platforms = [ "x86_64-linux" "aarch64-linux" ]; - }; - - ceph-common = with python.pkgs; buildPythonPackage { - pname = "ceph-common"; - inherit src version; - - sourceRoot = "ceph-${version}/src/python-common"; - - propagatedBuildInputs = [ - pyyaml - ]; - - nativeCheckInputs = [ - pytestCheckHook - ]; - - disabledTests = [ - # requires network access - "test_valid_addr" - ]; - - meta = getMeta "Ceph common module for code shared by manager modules"; - }; - - # Watch out for python <> boost compatibility - python = python310.override { - packageOverrides = self: super: { - sqlalchemy = super.sqlalchemy.overridePythonAttrs rec { - version = "1.4.46"; - src = fetchPypi { - pname = "SQLAlchemy"; - inherit version; - hash = "sha256-aRO4JH2KKS74MVFipRkx4rQM6RaB8bbxj2lwRSAMSjA="; - }; - disabledTestPaths = [ - "test/aaa_profiling" - "test/ext/mypy" - ]; - }; - }; - }; - - boost = boost179.override { - enablePython = true; - inherit python; - }; - - # TODO: split this off in build and runtime environment - ceph-python-env = python.withPackages (ps: with ps; [ - ceph-common - - # build time - cython - - # debian/control - bcrypt - cherrypy - influxdb - jinja2 - kubernetes - natsort - numpy - pecan - prettytable - pyjwt - pyopenssl - python-dateutil - pyyaml - requests - routes - scikit-learn - scipy - setuptools - sphinx - virtualenv - werkzeug - - # src/pybind/mgr/requirements-required.txt - cryptography - jsonpatch - - # src/tools/cephfs/shell/setup.py - cmd2 - colorama - ]); - inherit (ceph-python-env.python) sitePackages; - - version = "18.2.0"; - src = fetchurl { - url = "https://download.ceph.com/tarballs/ceph-${version}.tar.gz"; - hash = "sha256:0k9nl6xi5brva51rr14m7ig27mmmd7vrpchcmqc40q3c2khn6ns9"; - }; -in rec { - ceph = stdenv.mkDerivation { - pname = "ceph"; - inherit src version; - - nativeBuildInputs = [ - cmake - fmt - git - makeWrapper - nasm - pkg-config - python - python.pkgs.python # for the toPythonPath function - python.pkgs.wrapPython - which - (ensureNewerSourcesHook { year = "1980"; }) - # for building docs/man-pages presumably - doxygen - graphviz - ]; - - enableParallelBuilding = true; - - buildInputs = cryptoLibsMap.${cryptoStr} ++ [ - arrow-cpp - babeltrace - boost - bzip2 - ceph-python-env - cryptsetup - cunit - gperf - gtest - icu - libcap - libnl - libxml2 - lttng-ust - lua - lz4 - malloc - oath-toolkit - openldap - optLibatomic_ops - optLibs3 - optYasm - rdkafka - rocksdb' - snappy - sqlite - utf8proc - zlib - zstd - ] ++ lib.optionals stdenv.isLinux [ - keyutils - libcap_ng - liburing - libuuid - linuxHeaders - optLibaio - optLibxfs - optZfs - rabbitmq-c - rdma-core - udev - util-linux - ] ++ lib.optionals hasRadosgw [ - optCurl - optExpat - optFuse - optLibedit - ]; - - pythonPath = [ ceph-python-env "${placeholder "out"}/${ceph-python-env.sitePackages}" ]; - - preConfigure ='' - substituteInPlace src/common/module.c --replace "/sbin/modinfo" "modinfo" - substituteInPlace src/common/module.c --replace "/sbin/modprobe" "modprobe" - substituteInPlace src/common/module.c --replace "/bin/grep" "grep" - - # install target needs to be in PYTHONPATH for "*.pth support" check to succeed - # set PYTHONPATH, so the build system doesn't silently skip installing ceph-volume and others - export PYTHONPATH=${ceph-python-env}/${sitePackages}:$lib/${sitePackages}:$out/${sitePackages} - patchShebangs src/ - ''; - - cmakeFlags = [ - "-DCMAKE_INSTALL_DATADIR=${placeholder "lib"}/lib" - - "-DWITH_CEPHFS_SHELL:BOOL=ON" - "-DWITH_SYSTEMD:BOOL=OFF" - # `WITH_JAEGER` requires `thrift` as a depenedncy (fine), but the build fails with: - # CMake Error at src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-Release.cmake:49 (message): - # Command failed: 2 - # - # 'make' 'opentelemetry_trace' 'opentelemetry_exporter_jaeger_trace' - # - # See also - # - # /build/ceph-18.2.0/build/src/opentelemetry-cpp/src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-*.log - # and that file contains: - # /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc: In member function 'virtual void opentelemetry::v1::exporter::jaeger::TUDPTransport::close()': - # /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc:71:7: error: '::close' has not been declared; did you mean 'pclose'? - # 71 | ::THRIFT_CLOSESOCKET(socket_); - # | ^~~~~~~~~~~~~~~~~~ - # Looks like `close()` is somehow not included. - # But the relevant code is already removed in `open-telemetry` 1.10: https://github.com/open-telemetry/opentelemetry-cpp/pull/2031 - # So it's proably not worth trying to fix that for this Ceph version, - # and instead just disable Ceph's Jaeger support. - "-DWITH_JAEGER:BOOL=OFF" - "-DWITH_TESTS:BOOL=OFF" - - # Use our own libraries, where possible - "-DWITH_SYSTEM_ARROW:BOOL=ON" # Only used if other options enable Arrow support. - "-DWITH_SYSTEM_BOOST:BOOL=ON" - "-DWITH_SYSTEM_GTEST:BOOL=ON" - "-DWITH_SYSTEM_ROCKSDB:BOOL=ON" - "-DWITH_SYSTEM_UTF8PROC:BOOL=ON" - "-DWITH_SYSTEM_ZSTD:BOOL=ON" - - # TODO breaks with sandbox, tries to download stuff with npm - "-DWITH_MGR_DASHBOARD_FRONTEND:BOOL=OFF" - # WITH_XFS has been set default ON from Ceph 16, keeping it optional in nixpkgs for now - ''-DWITH_XFS=${if optLibxfs != null then "ON" else "OFF"}'' - ] ++ lib.optional stdenv.isLinux "-DWITH_SYSTEM_LIBURING=ON"; - - postFixup = '' - wrapPythonPrograms - wrapProgram $out/bin/ceph-mgr --prefix PYTHONPATH ":" "$(toPythonPath ${placeholder "out"}):$(toPythonPath ${ceph-python-env})" - - # Test that ceph-volume exists since the build system has a tendency to - # silently drop it with misconfigurations. - test -f $out/bin/ceph-volume - ''; - - outputs = [ "out" "lib" "dev" "doc" "man" ]; - - doCheck = false; # uses pip to install things from the internet - - # Takes 7+h to build with 2 cores. - requiredSystemFeatures = [ "big-parallel" ]; - - meta = getMeta "Distributed storage system"; - - passthru = { - inherit version; - tests = { - inherit (nixosTests) - ceph-multi-node - ceph-single-node - ceph-single-node-bluestore; - }; - }; - }; - - ceph-client = runCommand "ceph-client-${version}" { - meta = getMeta "Tools needed to mount Ceph's RADOS Block Devices/Cephfs"; - } '' - mkdir -p $out/{bin,etc,${sitePackages},share/bash-completion/completions} - cp -r ${ceph}/bin/{ceph,.ceph-wrapped,rados,rbd,rbdmap} $out/bin - cp -r ${ceph}/bin/ceph-{authtool,conf,dencoder,rbdnamer,syn} $out/bin - cp -r ${ceph}/bin/rbd-replay* $out/bin - cp -r ${ceph}/sbin/mount.ceph $out/bin - cp -r ${ceph}/sbin/mount.fuse.ceph $out/bin - ln -s bin $out/sbin - cp -r ${ceph}/${sitePackages}/* $out/${sitePackages} - cp -r ${ceph}/etc/bash_completion.d $out/share/bash-completion/completions - # wrapPythonPrograms modifies .ceph-wrapped, so lets just update its paths - substituteInPlace $out/bin/ceph --replace ${ceph} $out - substituteInPlace $out/bin/.ceph-wrapped --replace ${ceph} $out - ''; -} diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index efdeada..f31a598 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -24,12 +24,5 @@ final: prev: ]; }); - # Update ceph to 18.2.0 until it lands in nixpkgs, see: - # https://github.com/NixOS/nixpkgs/pull/247849 - inherit (prev.callPackage ./ceph.nix { - lua = prev.lua5_4; - fmt = prev.fmt_8; - }) ceph ceph-client; - prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; } -- 2.49.0 From cfe0c0e6e6f8001d94b5f6ba0735a0ae4bbabcc6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 20 Nov 2023 12:40:24 +0100 Subject: [PATCH 210/472] Fix warning in slurm exporter using vendorHash Reviewed-by: Aleix Roca Nonell --- pkgs/slurm-exporter.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/slurm-exporter.nix b/pkgs/slurm-exporter.nix index 49070cd..9cfc972 100644 --- a/pkgs/slurm-exporter.nix +++ b/pkgs/slurm-exporter.nix @@ -11,7 +11,7 @@ buildGoModule rec { sha256 = "sha256-KS9LoDuLQFq3KoKpHd8vg1jw20YCNRJNJrnBnu5vxvs="; }; - vendorSha256 = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI="; + vendorHash = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI="; doCheck = false; meta = with lib; { -- 2.49.0 From 5234ca32fd3b33a888a4a2be91e4e782597bd584 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 20 Nov 2023 12:57:31 +0100 Subject: [PATCH 211/472] Remove complete ceph package from hut Only the ceph-client is needed. Reviewed-by: Aleix Roca Nonell --- m/module/ceph.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/m/module/ceph.nix b/m/module/ceph.nix index ff3fd22..cf1217f 100644 --- a/m/module/ceph.nix +++ b/m/module/ceph.nix @@ -3,7 +3,6 @@ # Mounts the /ceph filesystem at boot { environment.systemPackages = with pkgs; [ - ceph ceph-client fio # For benchmarks ]; -- 2.49.0 From fe1d3fbb80c45b4797d519ad87b2cb5c51587a21 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 23 Nov 2023 12:39:43 +0100 Subject: [PATCH 212/472] Enable runners for pm.bsc.es/gitlab too The old runners for the PM gitlab were disabled in configuration in the last outage, but they remained working until we reboot the node. With this change we enable the runners for both PM and gitlab.bsc.es. Reviewed-by: Aleix Roca Nonell --- m/hut/gitlab-runner.nix | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index fd12123..3cbe4f6 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -8,9 +8,8 @@ services.gitlab-runner = { enable = true; settings.concurrent = 5; - services = { - gitlab-bsc-es-shell = { - registrationConfigFile = config.age.secrets.gitlabToken.path; + services = let + common-shell = { executor = "shell"; tagList = [ "nix" "xeon" ]; registrationFlags = [ @@ -21,8 +20,7 @@ SHELL = "${pkgs.bash}/bin/bash"; }; }; - gitlab-bsc-es-docker = { - registrationConfigFile = config.age.secrets.gitlabToken.path; + common-docker = { dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; registrationFlags = [ @@ -34,6 +32,21 @@ http_proxy = "http://localhost:23080"; }; }; + in { + # For gitlab.bsc.es + gitlab-bsc-es-shell = common-shell // { + registrationConfigFile = config.age.secrets.gitlabToken.path; + }; + gitlab-bsc-es-docker = common-docker // { + registrationConfigFile = config.age.secrets.gitlabToken.path; + }; + # For pm.bsc.es/gitlab + gitlab-pm-shell = common-shell // { + registrationConfigFile = config.age.secrets.ovniToken.path; + }; + gitlab-pm-docker = common-docker // { + registrationConfigFile = config.age.secrets.ovniToken.path; + }; }; }; -- 2.49.0 From ed887b04126caf56f326dc02a565f39b93fe8fe4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 21 Nov 2023 23:56:55 +0100 Subject: [PATCH 213/472] Use tmpfs in /tmp The /tmp directory was using the SSD disk which is not erased across boots. Nix will use /tmp to perform the builds, so we want it to be as fast as possible. In general, all the machines have enough space to handle large builds like LLVM. Reviewed-by: Aleix Roca Nonell --- m/common/fs.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/common/fs.nix b/m/common/fs.nix index 10339d0..c6fea28 100644 --- a/m/common/fs.nix +++ b/m/common/fs.nix @@ -25,4 +25,7 @@ device = "none"; fsType = "tracefs"; }; + + # Mount a tmpfs into /tmp + boot.tmp.useTmpfs = true; } -- 2.49.0 From c564d945d46b1c6d31997d762e8de832af03afc6 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 1 Dec 2023 19:57:04 +0100 Subject: [PATCH 214/472] Enable nixseparatedebuginfod module The module is only enabled on Hut and Eudy because we noticed activity on the debuginfod service even if no debug session was active. Reviewed-by: Rodrigo Arias Mallo --- flake.lock | 57 +++++++++++++++++++++++++++++++++++++++- flake.nix | 2 ++ m/eudy/configuration.nix | 1 + m/hut/configuration.nix | 1 + m/module/debuginfod.nix | 12 +++++++++ 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 m/module/debuginfod.nix diff --git a/flake.lock b/flake.lock index d807c7d..de52b3c 100644 --- a/flake.lock +++ b/flake.lock @@ -64,6 +64,24 @@ "type": "github" } }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1685518550, + "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, "home-manager": { "inputs": { "nixpkgs": [ @@ -101,11 +119,48 @@ "type": "github" } }, + "nixseparatedebuginfod": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1699185600, + "narHash": "sha256-jxU+5plCvsOJYEdLQTi7sKOOAnndin2sslYOF0Ys66g=", + "owner": "symphorien", + "repo": "nixseparatedebuginfod", + "rev": "232591f5274501b76dbcd83076a57760237fcd64", + "type": "github" + }, + "original": { + "owner": "symphorien", + "repo": "nixseparatedebuginfod", + "type": "github" + } + }, "root": { "inputs": { "agenix": "agenix", "bscpkgs": "bscpkgs", - "nixpkgs": "nixpkgs" + "nixpkgs": "nixpkgs", + "nixseparatedebuginfod": "nixseparatedebuginfod" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" } } }, diff --git a/flake.nix b/flake.nix index 807721a..6b19cc5 100644 --- a/flake.nix +++ b/flake.nix @@ -5,6 +5,8 @@ agenix.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; + nixseparatedebuginfod.url = "github:symphorien/nixseparatedebuginfod"; + nixseparatedebuginfod.inputs.nixpkgs.follows = "nixpkgs"; }; outputs = { self, nixpkgs, agenix, bscpkgs, ... }: diff --git a/m/eudy/configuration.nix b/m/eudy/configuration.nix index c627bf8..7d1f5e1 100644 --- a/m/eudy/configuration.nix +++ b/m/eudy/configuration.nix @@ -10,6 +10,7 @@ ./fs.nix ./users.nix ./slurm.nix + ../module/debuginfod.nix ]; # Select this using the ID to avoid mismatches diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 9187812..d571053 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -5,6 +5,7 @@ ../common/main.nix ../module/ceph.nix + ../module/debuginfod.nix ./gitlab-runner.nix ./monitoring.nix ./nfs.nix diff --git a/m/module/debuginfod.nix b/m/module/debuginfod.nix new file mode 100644 index 0000000..6d3c135 --- /dev/null +++ b/m/module/debuginfod.nix @@ -0,0 +1,12 @@ +{ theFlake, ... }: + +let + nixseparatedebuginfod = theFlake.inputs.nixseparatedebuginfod; +in +{ + imports = [ + nixseparatedebuginfod.nixosModules.default + ]; + + services.nixseparatedebuginfod.enable = true; +} -- 2.49.0 From ecbb45d6ac1973fc89227fc6646bf7c9b86b4173 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 1 Dec 2023 12:17:50 +0100 Subject: [PATCH 215/472] Monitor https://pm.bsc.es/gitlab/ too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GitLab instance is in the /gitlab endpoint and may fail independently of https://pm.bsc.es/. Cc: Víctor López Reviewed-by: Aleix Roca Nonell --- m/hut/monitoring.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index b0761c3..1dea2dd 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -106,6 +106,7 @@ static_configs = [{ targets = [ "https://pm.bsc.es/" + "https://pm.bsc.es/gitlab/" "https://jungle.bsc.es/" "https://gitlab.bsc.es/" ]; -- 2.49.0 From 5880a6e5f639790cc2c31d1520375d14b6c2d05c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 7 Dec 2023 11:08:15 +0100 Subject: [PATCH 216/472] Enable public-inbox at jungle.bsc.es/lists The public-inbox service fetches emails from the sourcehut mailing lists and displays them on the web. The idea is to reduce the dependency on external services and add a secondary storage for the mailing lists in case sourcehut goes down or changes the current free plans. The service is available in https://jungle.bsc.es/lists/ and is open to the public. It currently mirrors the bscpkgs and jungle mailing list. We also edited the CSS to improve the readability and have larger fonts by default. The service for public-inbox produced by NixOS is not well configured to fetch emails from an IMAP mail server, so we also manually edit the service file to enable the network. Reviewed-by: Aleix Roca Nonell --- m/hut/configuration.nix | 1 + m/hut/public-inbox.css | 79 +++++++++++++++++++++++++++++++++++++++++ m/hut/public-inbox.nix | 47 ++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 m/hut/public-inbox.css create mode 100644 m/hut/public-inbox.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index d571053..5c8e39a 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -11,6 +11,7 @@ ./nfs.nix ./slurm-daemon.nix ./nix-serve.nix + ./public-inbox.nix #./pxe.nix ]; diff --git a/m/hut/public-inbox.css b/m/hut/public-inbox.css new file mode 100644 index 0000000..9d0367c --- /dev/null +++ b/m/hut/public-inbox.css @@ -0,0 +1,79 @@ +/* + * CC0-1.0 + * Dark color scheme using 216 web-safe colors, inspired + * somewhat by the default color scheme in mutt. + * It reduces eyestrain for me, and energy usage for all: + * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme + */ + +* { + font-size: 14px; + font-family: monospace; +} + +pre { + white-space: pre-wrap; + padding: 10px; + background: #f5f5f5; +} + +hr { + margin: 30px 0; +} + +body { + max-width: 120ex; /* 120 columns wide */ + margin: 50px auto; +} + +/* + * Underlined links add visual noise which make them hard-to-read. + * Use colors to make them stand out, instead. + */ +a:link { + color: #007; + text-decoration: none; +} +a:visited { + color:#504; +} +a:hover { + text-decoration: underline; +} + +/* quoted text in emails gets a different color */ +*.q { color:gray } + +/* + * these may be used with cgit , too. + * (cgit uses
, public-inbox uses ) + */ +*.add { color:darkgreen } /* diff post-image lines */ +*.del { color:darkred } /* diff pre-image lines */ +*.head { color:black } /* diff header (metainformation) */ +*.hunk { color:gray } /* diff hunk-header */ + +/* + * highlight 3.x colors (tested 3.18) for displaying blobs. + * This doesn't use most of the colors available, as I find too + * many colors overwhelming, so the default is commented out. + */ +.hl.num { color:#f30 } /* number */ +.hl.esc { color:#f0f } /* escape character */ +.hl.str { color:#f30 } /* string */ +.hl.ppc { color:#f0f } /* preprocessor */ +.hl.pps { color:#f30 } /* preprocessor string */ +.hl.slc { color:#09f } /* single-line comment */ +.hl.com { color:#09f } /* multi-line comment */ +/* .hl.opt { color:#ccc } */ /* operator */ +/* .hl.ipl { color:#ccc } */ /* interpolation */ + +/* keyword groups kw[a-z] */ +.hl.kwa { color:#ff0 } +.hl.kwb { color:#0f0 } +.hl.kwc { color:#ff0 } +/* .hl.kwd { color:#ccc } */ + +/* line-number (unused by public-inbox) */ +/* .hl.lin { color:#ccc } */ + diff --git a/m/hut/public-inbox.nix b/m/hut/public-inbox.nix new file mode 100644 index 0000000..a3d7a67 --- /dev/null +++ b/m/hut/public-inbox.nix @@ -0,0 +1,47 @@ +{ lib, ... }: + +{ + services.public-inbox = { + enable = true; + http = { + enable = true; + port = 8081; + mounts = [ "/lists" ]; + }; + settings.publicinbox = { + css = [ "${./public-inbox.css}" ]; + wwwlisting = "all"; + }; + inboxes = { + bscpkgs = { + url = "https://jungle.bsc.es/lists/bscpkgs"; + address = [ "~rodarima/bscpkgs@lists.sr.ht" ]; + watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ]; + description = "Patches for bscpkgs"; + listid = "~rodarima/bscpkgs.lists.sr.ht"; + }; + jungle = { + url = "https://jungle.bsc.es/lists/jungle"; + address = [ "~rodarima/jungle@lists.sr.ht" ]; + watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ]; + description = "Patches for jungle"; + listid = "~rodarima/jungle.lists.sr.ht"; + }; + }; + }; + + # We need access to the network for the watch service, as we will fetch the + # emails directly from the IMAP server. + systemd.services.public-inbox-watch.serviceConfig = { + PrivateNetwork = lib.mkForce false; + RestrictAddressFamilies = lib.mkForce [ "AF_UNIX" "AF_INET" "AF_INET6" ]; + KillSignal = "SIGKILL"; # Avoid slow shutdown + + # Required for chmod(..., 02750) on directories by git, from + # systemd.exec(8): + # > Note that this restricts marking of any type of file system object with + # > these bits, including both regular files and directories (where the SGID + # > is a different meaning than for files, see documentation). + RestrictSUIDSGID = lib.mkForce false; + }; +} -- 2.49.0 From 7f17fe8874dd28307e5ddf8a9c2061d9ac45bb4d Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Fri, 9 Feb 2024 11:14:34 +0100 Subject: [PATCH 217/472] Move slurm client in a separate module Reviewed-by: Rodrigo Arias Mallo --- m/bay/configuration.nix | 4 ---- m/common/main.nix | 1 - m/eudy/configuration.nix | 1 - m/eudy/slurm.nix | 7 ------- m/hut/configuration.nix | 3 ++- m/hut/{slurm-daemon.nix => slurm-server.nix} | 0 m/koro/configuration.nix | 1 - m/lake2/configuration.nix | 4 ---- m/{common/slurm.nix => module/slurm-client.nix} | 0 m/owl1/configuration.nix | 1 + m/owl2/configuration.nix | 1 + 11 files changed, 4 insertions(+), 19 deletions(-) delete mode 100644 m/eudy/slurm.nix rename m/hut/{slurm-daemon.nix => slurm-server.nix} (100%) rename m/{common/slurm.nix => module/slurm-client.nix} (100%) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 5e2b342..70b7165 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -13,10 +13,6 @@ ceph ]; - services.slurm = { - client.enable = lib.mkForce false; - }; - networking = { hostName = "bay"; interfaces.eno1.ipv4.addresses = [ { diff --git a/m/common/main.nix b/m/common/main.nix index 2844b3f..4a4671d 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -8,7 +8,6 @@ ./hw.nix ./net.nix ./ntp.nix - ./slurm.nix ./ssh.nix ./users.nix ./watchdog.nix diff --git a/m/eudy/configuration.nix b/m/eudy/configuration.nix index 7d1f5e1..b0889d8 100644 --- a/m/eudy/configuration.nix +++ b/m/eudy/configuration.nix @@ -9,7 +9,6 @@ ./cpufreq.nix ./fs.nix ./users.nix - ./slurm.nix ../module/debuginfod.nix ]; diff --git a/m/eudy/slurm.nix b/m/eudy/slurm.nix deleted file mode 100644 index 0aa3bda..0000000 --- a/m/eudy/slurm.nix +++ /dev/null @@ -1,7 +0,0 @@ -{ lib, ... }: - -{ - services.slurm = { - client.enable = lib.mkForce false; - }; -} diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 5c8e39a..4014b57 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -6,10 +6,11 @@ ../module/ceph.nix ../module/debuginfod.nix + ../module/slurm-client.nix ./gitlab-runner.nix ./monitoring.nix ./nfs.nix - ./slurm-daemon.nix + ./slurm-server.nix ./nix-serve.nix ./public-inbox.nix #./pxe.nix diff --git a/m/hut/slurm-daemon.nix b/m/hut/slurm-server.nix similarity index 100% rename from m/hut/slurm-daemon.nix rename to m/hut/slurm-server.nix diff --git a/m/koro/configuration.nix b/m/koro/configuration.nix index 6623549..9c92ef3 100644 --- a/m/koro/configuration.nix +++ b/m/koro/configuration.nix @@ -7,7 +7,6 @@ ../eudy/cpufreq.nix ../eudy/users.nix - ../eudy/slurm.nix ./users.nix ./kernel.nix ]; diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 58bfef8..aacf5b9 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -12,10 +12,6 @@ ceph ]; - services.slurm = { - client.enable = lib.mkForce false; - }; - services.ceph = { enable = true; global = { diff --git a/m/common/slurm.nix b/m/module/slurm-client.nix similarity index 100% rename from m/common/slurm.nix rename to m/module/slurm-client.nix diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index d21ccfd..b208139 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/main.nix ../module/ceph.nix + ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/slurm-hut-nix-store.nix ]; diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 5fdabbe..fac678b 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/main.nix ../module/ceph.nix + ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/slurm-hut-nix-store.nix ]; -- 2.49.0 From 1df80460d2f8e2c5b2bdc43466daa4207f385ab6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 13 Feb 2024 11:50:38 +0100 Subject: [PATCH 218/472] Add another HTTPS probe for bsc.es As all other HTTPS probes pass through the opsproxy01.bsc.es proxy, we cannot detect a problem in our proxy or in the BSC one. Adding another target like bsc.es that doesn't use the ops proxy allows us to discern where the problem lies. Instead of monitoring https://www.bsc.es/ directly, which will trigger the whole Drupal server and take a whole second, we just fetch robots.txt so the overhead on the server is minimal (and returns in less than 10 ms). Reviewed-by: Aleix Roca Nonell --- m/hut/monitoring.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 1dea2dd..bbcd48f 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -105,6 +105,7 @@ params = { module = [ "http_2xx" ]; }; static_configs = [{ targets = [ + "https://www.bsc.es/robots.txt" "https://pm.bsc.es/" "https://pm.bsc.es/gitlab/" "https://jungle.bsc.es/" -- 2.49.0 From 82ccae1315c4dfadf36b1d17323047cce4bfaa55 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 29 Feb 2024 09:57:18 +0100 Subject: [PATCH 219/472] Use google.com probe instead of bsc.es The main website of the BSC is failing every day around 3:00 AM for almost one hour, so it is not a very good target. Instead, google.com is used which should be more reliable. The same robots.txt path is fetched, as it is smaller than the main page. Reviewed-by: Aleix Roca Nonell --- m/hut/monitoring.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index bbcd48f..e55c9a1 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -105,7 +105,7 @@ params = { module = [ "http_2xx" ]; }; static_configs = [{ targets = [ - "https://www.bsc.es/robots.txt" + "https://www.google.com/robots.txt" "https://pm.bsc.es/" "https://pm.bsc.es/gitlab/" "https://jungle.bsc.es/" -- 2.49.0 From 9f1cd0214474dfa2a2c9a3881166e258cd70036a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Mar 2024 16:41:30 +0100 Subject: [PATCH 220/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'agenix': 'github:ryantm/agenix/daf42cb35b2dc614d1551e37f96406e4c4a2d3e4' (2023-10-08) → 'github:ryantm/agenix/1381a759b205dff7a6818733118d02253340fd5e' (2024-04-02) • Updated input 'agenix/darwin': 'github:lnl7/nix-darwin/87b9d090ad39b25b2400029c64825fc2a8868943' (2023-01-09) → 'github:lnl7/nix-darwin/4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d' (2023-11-24) • Updated input 'agenix/home-manager': 'github:nix-community/home-manager/32d3e39c491e2f91152c84f8ad8b003420eab0a1' (2023-04-22) → 'github:nix-community/home-manager/3bfaacf46133c037bb356193bd2f1765d9dc82c1' (2023-12-20) • Added input 'agenix/systems': 'github:nix-systems/default/da67096a3b9bf56a91d16901293e51ba5b49a27e' (2023-04-09) • Updated input 'bscpkgs': 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=e148de50d68b3eeafc3389b331cf042075971c4b' (2023-11-22) → 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=de89197a4a7b162db7df9d41c9d07759d87c5709' (2024-04-24) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/e4ad989506ec7d71f7302cc3067abd82730a4beb' (2023-11-19) → 'github:NixOS/nixpkgs/6143fc5eeb9c4f00163267708e26191d1e918932' (2024-04-21) • Updated input 'nixseparatedebuginfod': 'github:symphorien/nixseparatedebuginfod/232591f5274501b76dbcd83076a57760237fcd64' (2023-11-05) → 'github:symphorien/nixseparatedebuginfod/98d79461660f595637fa710d59a654f242b4c3f7' (2024-03-07) • Removed input 'nixseparatedebuginfod' • Removed input 'nixseparatedebuginfod/flake-utils' • Removed input 'nixseparatedebuginfod/flake-utils/systems' • Removed input 'nixseparatedebuginfod/nixpkgs' Reviewed-by: Aleix Roca Nonell --- flake.lock | 77 ++++++++++++++---------------------------------------- 1 file changed, 19 insertions(+), 58 deletions(-) diff --git a/flake.lock b/flake.lock index de52b3c..5139cc1 100644 --- a/flake.lock +++ b/flake.lock @@ -6,14 +6,15 @@ "home-manager": "home-manager", "nixpkgs": [ "nixpkgs" - ] + ], + "systems": "systems" }, "locked": { - "lastModified": 1696775529, - "narHash": "sha256-TYlE4B0ktPtlJJF9IFxTWrEeq+XKG8Ny0gc2FGEAdj0=", + "lastModified": 1712079060, + "narHash": "sha256-/JdiT9t+zzjChc5qQiF+jhrVhRt8figYH29rZO7pFe4=", "owner": "ryantm", "repo": "agenix", - "rev": "daf42cb35b2dc614d1551e37f96406e4c4a2d3e4", + "rev": "1381a759b205dff7a6818733118d02253340fd5e", "type": "github" }, "original": { @@ -29,11 +30,11 @@ ] }, "locked": { - "lastModified": 1700663915, - "narHash": "sha256-2NiVhUgOs26nVqnypx8Er3sjyHD2cApjYV9iYrGY9xE=", + "lastModified": 1713974364, + "narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=", "ref": "refs/heads/master", - "rev": "e148de50d68b3eeafc3389b331cf042075971c4b", - "revCount": 912, + "rev": "de89197a4a7b162db7df9d41c9d07759d87c5709", + "revCount": 937, "type": "git", "url": "https://git.sr.ht/~rodarima/bscpkgs" }, @@ -50,11 +51,11 @@ ] }, "locked": { - "lastModified": 1673295039, - "narHash": "sha256-AsdYgE8/GPwcelGgrntlijMg4t3hLFJFCRF3tL5WVjA=", + "lastModified": 1700795494, + "narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=", "owner": "lnl7", "repo": "nix-darwin", - "rev": "87b9d090ad39b25b2400029c64825fc2a8868943", + "rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d", "type": "github" }, "original": { @@ -64,24 +65,6 @@ "type": "github" } }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1685518550, - "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, "home-manager": { "inputs": { "nixpkgs": [ @@ -90,11 +73,11 @@ ] }, "locked": { - "lastModified": 1682203081, - "narHash": "sha256-kRL4ejWDhi0zph/FpebFYhzqlOBrk0Pl3dzGEKSAlEw=", + "lastModified": 1703113217, + "narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=", "owner": "nix-community", "repo": "home-manager", - "rev": "32d3e39c491e2f91152c84f8ad8b003420eab0a1", + "rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1", "type": "github" }, "original": { @@ -105,11 +88,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1700390070, - "narHash": "sha256-de9KYi8rSJpqvBfNwscWdalIJXPo8NjdIZcEJum1mH0=", + "lastModified": 1713714899, + "narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "e4ad989506ec7d71f7302cc3067abd82730a4beb", + "rev": "6143fc5eeb9c4f00163267708e26191d1e918932", "type": "github" }, "original": { @@ -119,33 +102,11 @@ "type": "github" } }, - "nixseparatedebuginfod": { - "inputs": { - "flake-utils": "flake-utils", - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1699185600, - "narHash": "sha256-jxU+5plCvsOJYEdLQTi7sKOOAnndin2sslYOF0Ys66g=", - "owner": "symphorien", - "repo": "nixseparatedebuginfod", - "rev": "232591f5274501b76dbcd83076a57760237fcd64", - "type": "github" - }, - "original": { - "owner": "symphorien", - "repo": "nixseparatedebuginfod", - "type": "github" - } - }, "root": { "inputs": { "agenix": "agenix", "bscpkgs": "bscpkgs", - "nixpkgs": "nixpkgs", - "nixseparatedebuginfod": "nixseparatedebuginfod" + "nixpkgs": "nixpkgs" } }, "systems": { -- 2.49.0 From 366436b6d3b484559455b851f4501edff0ec3807 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Mar 2024 16:44:21 +0100 Subject: [PATCH 221/472] Remove nixseparatedebuginfod input It has been integrated in nixpkgs, so is no longer required. Reviewed-by: Aleix Roca Nonell --- flake.nix | 2 -- m/module/debuginfod.nix | 9 --------- 2 files changed, 11 deletions(-) diff --git a/flake.nix b/flake.nix index 6b19cc5..807721a 100644 --- a/flake.nix +++ b/flake.nix @@ -5,8 +5,6 @@ agenix.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; - nixseparatedebuginfod.url = "github:symphorien/nixseparatedebuginfod"; - nixseparatedebuginfod.inputs.nixpkgs.follows = "nixpkgs"; }; outputs = { self, nixpkgs, agenix, bscpkgs, ... }: diff --git a/m/module/debuginfod.nix b/m/module/debuginfod.nix index 6d3c135..a7dc05d 100644 --- a/m/module/debuginfod.nix +++ b/m/module/debuginfod.nix @@ -1,12 +1,3 @@ -{ theFlake, ... }: - -let - nixseparatedebuginfod = theFlake.inputs.nixseparatedebuginfod; -in { - imports = [ - nixseparatedebuginfod.nixosModules.default - ]; - services.nixseparatedebuginfod.enable = true; } -- 2.49.0 From 30f2079f0b28672107898e4564842aae571c9901 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Mar 2024 16:59:11 +0100 Subject: [PATCH 222/472] Merge pmix outputs for MPICH MPICH expects headers and libraries to be present in the same directory. Reviewed-by: Aleix Roca Nonell --- pkgs/overlay.nix | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index f31a598..34e56fb 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -4,10 +4,16 @@ final: prev: mpi = final.mpich; # Configure the network for MPICH - mpich = with final; prev.mpich.overrideAttrs (old: { + mpich = with final; let + # pmix comes with the libraries in .out and headers in .dev + pmixAll = symlinkJoin { + name = "pmix-all"; + paths = [ pmix.dev pmix.out ]; + }; + in prev.mpich.overrideAttrs (old: { buildInput = old.buildInputs ++ [ libfabric - pmix + pmixAll ]; configureFlags = [ "--enable-shared" @@ -15,8 +21,8 @@ final: prev: "--with-pm=no" "--with-device=ch4:ofi" "--with-pmi=pmix" - "--with-pmix=${final.pmix}" - "--with-libfabric=${final.libfabric}" + "--with-pmix=${pmixAll}" + "--with-libfabric=${libfabric}" "--enable-g=log" ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 -- 2.49.0 From 2b26cd2f463911c580f9b146afe77c416efc75f5 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 15 Mar 2024 13:12:46 +0100 Subject: [PATCH 223/472] Fix SLURM bug in rank integer sign expansion See: https://bugs.schedmd.com/show_bug.cgi?id=19324 Reviewed-by: Aleix Roca Nonell --- pkgs/overlay.nix | 7 +++++++ pkgs/slurm-rank-expansion.patch | 11 +++++++++++ 2 files changed, 18 insertions(+) create mode 100644 pkgs/slurm-rank-expansion.patch diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 34e56fb..e354fd8 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -30,5 +30,12 @@ final: prev: ]; }); + slurm = prev.slurm.overrideAttrs (old: { + patches = (old.patches or []) ++ [ + # See https://bugs.schedmd.com/show_bug.cgi?id=19324 + ./slurm-rank-expansion.patch + ]; + }); + prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; } diff --git a/pkgs/slurm-rank-expansion.patch b/pkgs/slurm-rank-expansion.patch new file mode 100644 index 0000000..4666d76 --- /dev/null +++ b/pkgs/slurm-rank-expansion.patch @@ -0,0 +1,11 @@ +--- a/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:05:24.815313882 +0100 ++++ b/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:09:53.936900823 +0100 +@@ -314,7 +314,7 @@ static void _dmdx_req(buf_t *buf, int no + } + + nsptr = pmixp_nspaces_local(); +- if (nsptr->ntasks <= rank) { ++ if ((long) nsptr->ntasks <= (long) rank) { + char *nodename = pmixp_info_job_host(nodeid); + PMIXP_ERROR("Bad request from %s: nspace \"%s\" has only %d ranks, asked for %d", + nodename, ns, nsptr->ntasks, rank); -- 2.49.0 From 3863fc25a52df5611a95c976bfcec6c4b893ebcc Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 15 Mar 2024 21:39:43 +0100 Subject: [PATCH 224/472] Add workaround for MPICH 4.2.0 See: https://github.com/pmodels/mpich/issues/6946 Reviewed-by: Aleix Roca Nonell --- pkgs/mpich-fix-hwtopo.patch | 36 ++++++++++++++++++++++++++++++++++++ pkgs/overlay.nix | 4 ++++ 2 files changed, 40 insertions(+) create mode 100644 pkgs/mpich-fix-hwtopo.patch diff --git a/pkgs/mpich-fix-hwtopo.patch b/pkgs/mpich-fix-hwtopo.patch new file mode 100644 index 0000000..11146d5 --- /dev/null +++ b/pkgs/mpich-fix-hwtopo.patch @@ -0,0 +1,36 @@ +diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c +index 33e88bc..ee3641c 100644 +--- a/src/util/mpir_hwtopo.c ++++ b/src/util/mpir_hwtopo.c +@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void) + #ifdef HAVE_HWLOC + bindset = hwloc_bitmap_alloc(); + hwloc_topology_init(&hwloc_topology); +- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile"); +- if (xmlfile != NULL) { +- int rc; +- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile); +- if (rc == 0) { +- /* To have hwloc still actually call OS-specific hooks, the +- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded +- * file is really the underlying system. */ +- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM); +- } +- MPL_free(xmlfile); +- } + + hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL); + if (!hwloc_topology_load(hwloc_topology)) + +--- a/src/mpi/init/local_proc_attrs.c ++++ b/src/mpi/init/local_proc_attrs.c +@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required) + /* Set the number of tag bits. The device may override this value. */ + MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT; + +- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds"); +- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds); +- MPL_free(requested_kinds); +- + return mpi_errno; + } diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index e354fd8..a36961b 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -11,6 +11,10 @@ final: prev: paths = [ pmix.dev pmix.out ]; }; in prev.mpich.overrideAttrs (old: { + patches = [ + # See https://github.com/pmodels/mpich/issues/6946 + ./mpich-fix-hwtopo.patch + ]; buildInput = old.buildInputs ++ [ libfabric pmixAll -- 2.49.0 From c8160122b3819049c0b5ca4c5a43c5ff583ff44f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 24 Apr 2024 16:55:06 +0200 Subject: [PATCH 225/472] Add firewall rules for Ceph and monitoring The firewall was blocking the monitoring traffic from hut and the Ceph traffic among OSDs. The rules only allow connecting from the specific host that they are supposed to be coming from. Reviewed-by: Aleix Roca Nonell --- m/bay/configuration.nix | 10 ++++++++++ m/lake2/configuration.nix | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 70b7165..ac34f69 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -23,6 +23,16 @@ address = "10.0.42.40"; prefixLength = 24; } ]; + firewall = { + extraCommands = '' + # Accept all incoming TCP traffic from lake2 + iptables -A nixos-fw -p tcp -s lake2 -j nixos-fw-accept + # Accept monitoring requests from hut + iptables -A nixos-fw -p tcp -s hut -m multiport --dport 9283,9002 -j nixos-fw-accept + # Accept all Ceph traffic from the local network + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept + ''; + }; }; services.ceph = { diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index aacf5b9..934f6f7 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -45,6 +45,14 @@ address = "10.0.42.42"; prefixLength = 24; } ]; + firewall = { + extraCommands = '' + # Accept all incoming TCP traffic from bay + iptables -A nixos-fw -p tcp -s bay -j nixos-fw-accept + # Accept monitoring requests from hut + iptables -A nixos-fw -p tcp -s hut --dport 9002 -j nixos-fw-accept + ''; + }; }; # Missing service for volumes, see: -- 2.49.0 From 432e6c8521cf8941dae928e7d1d17bb9ebecb3d8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 26 Apr 2024 16:52:52 +0200 Subject: [PATCH 226/472] Add Gitea service Reviewed-by: Aleix Roca Nonell --- m/hut/configuration.nix | 1 + m/hut/gitea.nix | 51 ++++++++++++++++++++++++++++++++++ secrets/gitea-runner-token.age | 9 ++++++ secrets/secrets.nix | 1 + 4 files changed, 62 insertions(+) create mode 100644 m/hut/gitea.nix create mode 100644 secrets/gitea-runner-token.age diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 4014b57..09fba85 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -13,6 +13,7 @@ ./slurm-server.nix ./nix-serve.nix ./public-inbox.nix + ./gitea.nix #./pxe.nix ]; diff --git a/m/hut/gitea.nix b/m/hut/gitea.nix new file mode 100644 index 0000000..651e2c8 --- /dev/null +++ b/m/hut/gitea.nix @@ -0,0 +1,51 @@ +{ config, lib, ... }: +{ + age.secrets.giteaRunnerToken.file = ../../secrets/gitea-runner-token.age; + + services.gitea = { + enable = true; + appName = "Gitea in the jungle"; + + settings = { + server = { + ROOT_URL = "https://jungle.bsc.es/git/"; + LOCAL_ROOT_URL = "https://jungle.bsc.es/git/"; + LANDING_PAGE = "explore"; + }; + metrics.ENABLED = true; + service.REGISTER_MANUAL_CONFIRM = true; + }; + }; + + services.gitea-actions-runner.instances = { + runrun = { + enable = true; + name = "runrun"; + url = "https://jungle.bsc.es/git/"; + tokenFile = config.age.secrets.giteaRunnerToken.path; + labels = [ "native:host" ]; + settings.runner.capacity = 8; + }; + }; + + systemd.services.gitea-runner-runrun = { + path = [ "/run/current-system/sw" ]; + serviceConfig = { + # DynamicUser doesn't work well with SSH + DynamicUser = lib.mkForce false; + User = "gitea-runner"; + Group = "gitea-runner"; + }; + }; + + users.users.gitea-runner = { + isSystemUser = true; + home = "/var/lib/gitea-runner"; + description = "Gitea Runner"; + group = "gitea-runner"; + extraGroups = [ "docker" ]; + createHome = true; + }; + users.groups.gitea-runner = {}; +} + diff --git a/secrets/gitea-runner-token.age b/secrets/gitea-runner-token.age new file mode 100644 index 0000000..1c799cf --- /dev/null +++ b/secrets/gitea-runner-token.age @@ -0,0 +1,9 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg DQdgCk16Yu524BsrWVf0krnwWzDM6SeaJCgQipOfwCA +Ab9ocqra/UWJZI+QGMlxUhBu5AzqfjPgXl+ENIiHYGs +-> ssh-ed25519 CAWG4Q KF9rGCenb3nf+wyz2hyVs/EUEbsmUs5R+1fBxlCibC8 +7++Kxbr3FHVdVfnFdHYdAuR0Tgfd+sRcO6WRss6LhEw +-> ssh-ed25519 MSF3dg aUe4DhRsu4X8CFOEAnD/XM/o/0qHYSB522woCaAVh0I +GRcs5cm2YqA/lGhUtbpboBaz7mfgiLaCr+agaB7vACU +--- 9Q7Ou+Pxq+3RZilCb2dKC/pCFjZEt4rp5KnTUUU7WJ8 +1Mw4 :H@/gLtM,ƥ*zNV5mNoj1 $TG_E{%1ǯHAp \ No newline at end of file diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 991cdd5..debaccc 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -7,6 +7,7 @@ let in { "gitlab-bsc-es-token.age".publicKeys = hut; + "gitea-runner-token.age".publicKeys = hut; "ovni-token.age".publicKeys = hut; "nosv-token.age".publicKeys = hut; "nix-serve.age".publicKeys = hut; -- 2.49.0 From ba60e121df1d882389d8afa156dc0b7e73b63177 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 29 Apr 2024 11:22:45 +0200 Subject: [PATCH 227/472] Collect Gitea metrics in Prometheus Reviewed-by: Aleix Roca Nonell --- m/hut/monitoring.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index e55c9a1..b08730f 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -160,6 +160,10 @@ } ]; } + { + job_name = "gitea"; + static_configs = [{ targets = [ "127.0.0.1:3000" ]; }]; + } { # Scrape the IPMI info of the hosts remotely via LAN job_name = "ipmi-lan"; -- 2.49.0 From 81b680a7d27b143e42ad83418eb7b4f229dc57bb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 30 Apr 2024 13:04:45 +0200 Subject: [PATCH 228/472] Allow Ceph traffic to lake2 --- m/lake2/configuration.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 934f6f7..8e19a12 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -51,6 +51,8 @@ iptables -A nixos-fw -p tcp -s bay -j nixos-fw-accept # Accept monitoring requests from hut iptables -A nixos-fw -p tcp -s hut --dport 9002 -j nixos-fw-accept + # Accept all Ceph traffic from the local network + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept ''; }; }; -- 2.49.0 From dbd95dd7b82943d3d42b31a14271122f67edda09 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 2 May 2024 17:54:09 +0200 Subject: [PATCH 229/472] Add msmtp to send notifications via email Reviewed-by: Aleix Roca Nonell --- m/hut/configuration.nix | 1 + m/hut/msmtp.nix | 24 ++++++++++++++++++++++++ secrets/jungle-robot-password.age | 10 ++++++++++ secrets/secrets.nix | 1 + 4 files changed, 36 insertions(+) create mode 100644 m/hut/msmtp.nix create mode 100644 secrets/jungle-robot-password.age diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 09fba85..c7ec747 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -14,6 +14,7 @@ ./nix-serve.nix ./public-inbox.nix ./gitea.nix + ./msmtp.nix #./pxe.nix ]; diff --git a/m/hut/msmtp.nix b/m/hut/msmtp.nix new file mode 100644 index 0000000..aaeaf5d --- /dev/null +++ b/m/hut/msmtp.nix @@ -0,0 +1,24 @@ +{ config, lib, ... }: +{ + age.secrets.jungleRobotPassword = { + file = ../../secrets/jungle-robot-password.age; + group = "gitea"; + mode = "440"; + }; + + programs.msmtp = { + enable = true; + accounts = { + default = { + auth = true; + tls = true; + tls_starttls = false; + port = 465; + host = "mail.bsc.es"; + user = "jungle-robot"; + passwordeval = "cat ${config.age.secrets.jungleRobotPassword.path}"; + from = "jungle-robot@bsc.es"; + }; + }; + }; +} diff --git a/secrets/jungle-robot-password.age b/secrets/jungle-robot-password.age new file mode 100644 index 0000000..de9bf22 --- /dev/null +++ b/secrets/jungle-robot-password.age @@ -0,0 +1,10 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg 3L1Y5upc5qN6fgiFAox5rD/W8n0eQUv5mT39QAdO5Ac +XkWsmPmzRgHjsvJgsDKJRgHZ7/sBZFmd1Doppj/y390 +-> ssh-ed25519 CAWG4Q v03Qr+fckdIpsxvQG/viKxlF8WNpO4XUe//QcPzH4k0 +afUwi3ccDCRfUxPDdF7ZkoL+0UX1XwqVtiyabDWjVQk +-> ssh-ed25519 MSF3dg c2hEUk4LslJpiL7v/4UpT8fK7ZiBJ8+uRhZ/vBoRUDE +YX9EpnJpHo1eDsZtapTVY6jD+81kb588Oik4NoY9jro +--- LhUkopNtCsyHCLzEYzBFs+vekOkAR4B3VBaiMF/ZF8w +o˝CHyLؔItMIױsM\1-KG: +gbpFӶ%Y \ No newline at end of file diff --git a/secrets/secrets.nix b/secrets/secrets.nix index debaccc..9844734 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -11,6 +11,7 @@ in "ovni-token.age".publicKeys = hut; "nosv-token.age".publicKeys = hut; "nix-serve.age".publicKeys = hut; + "jungle-robot-password.age".publicKeys = hut; "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; -- 2.49.0 From 1189626a6f347abdc07b0f674da77af650693b33 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 2 May 2024 18:54:38 +0200 Subject: [PATCH 230/472] Enable mail notification in Gitea Reviewed-by: Aleix Roca Nonell --- m/hut/gitea.nix | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/m/hut/gitea.nix b/m/hut/gitea.nix index 651e2c8..22a494b 100644 --- a/m/hut/gitea.nix +++ b/m/hut/gitea.nix @@ -13,7 +13,18 @@ LANDING_PAGE = "explore"; }; metrics.ENABLED = true; - service.REGISTER_MANUAL_CONFIRM = true; + service = { + REGISTER_MANUAL_CONFIRM = true; + ENABLE_NOTIFY_MAIL = true; + }; + + mailer = { + ENABLED = true; + FROM = "jungle-robot@bsc.es"; + PROTOCOL = "sendmail"; + SENDMAIL_PATH = "/run/wrappers/bin/sendmail"; + SENDMAIL_ARGS = "--"; + }; }; }; -- 2.49.0 From b8b85f55cd210f180c105a131bf80ee885ed13c1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 31 May 2024 13:54:06 +0200 Subject: [PATCH 231/472] Enable Grafana email alerts Allows sending Grafana alerts via email too, so we have a reduntant mechanism in case Slack fails to deliver them. Reviewed-by: Aleix Roca Nonell --- m/hut/monitoring.nix | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index b08730f..8f0d5fa 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -3,6 +3,12 @@ { imports = [ ../module/slurm-exporter.nix ]; + age.secrets.grafanaJungleRobotPassword = { + file = ../../secrets/jungle-robot-password.age; + owner = "grafana"; + mode = "400"; + }; + services.grafana = { enable = true; settings = { @@ -13,6 +19,16 @@ http_port = 2342; http_addr = "127.0.0.1"; }; + smtp = { + enabled = true; + from_address = "jungle-robot@bsc.es"; + user = "jungle-robot"; + # Read the password from a file, which is only readable by grafana user + # https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider + password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}"; + host = "mail.bsc.es:465"; + startTLS_policy = "NoStartTLS"; + }; feature_toggles.publicDashboards = true; "auth.anonymous".enabled = true; }; -- 2.49.0 From b1ce302e4b08f7ee9bba46caadb2874b53f4dcee Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 30 May 2024 13:35:58 +0200 Subject: [PATCH 232/472] Add PostgreSQL DB for performance test results The database will hold the performance results of the execution of the benchmarks. We follow the same setup on knights3 for now. Reviewed-by: Aleix Roca Nonell --- m/hut/configuration.nix | 1 + m/hut/postgresql.nix | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 m/hut/postgresql.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index c7ec747..aa438c0 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -15,6 +15,7 @@ ./public-inbox.nix ./gitea.nix ./msmtp.nix + ./postgresql.nix #./pxe.nix ]; diff --git a/m/hut/postgresql.nix b/m/hut/postgresql.nix new file mode 100644 index 0000000..fc86d7a --- /dev/null +++ b/m/hut/postgresql.nix @@ -0,0 +1,19 @@ +{ lib, ... }: + +{ + services.postgresql = { + enable = true; + ensureDatabases = [ "perftestsdb" ]; + ensureUsers = [ + { name = "anavarro"; ensureClauses.superuser = true; } + { name = "rarias"; ensureClauses.superuser = true; } + { name = "grafana"; } + ]; + authentication = '' + #type database DBuser auth-method + local perftestsdb rarias trust + local perftestsdb anavarro trust + local perftestsdb grafana trust + ''; + }; +} -- 2.49.0 From 15b4b28d2cb0b95c4933501a046a9a075bc326ef Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 6 Jun 2024 14:06:33 +0200 Subject: [PATCH 233/472] Control user access to each machine The users.jungleUsers configuration option behaves like the users.users option, but defines the list attribute `hosts` for each user, which filters users so that only the user can only access those hosts. Reviewed-by: Aleix Roca Nonell --- m/common/users.nix | 8 ++++++++ m/module/jungle-users.nix | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 m/module/jungle-users.nix diff --git a/m/common/users.nix b/m/common/users.nix index 8451196..02680fc 100644 --- a/m/common/users.nix +++ b/m/common/users.nix @@ -1,6 +1,10 @@ { pkgs, ... }: { + imports = [ + ../module/jungle-users.nix + ]; + users = { mutableUsers = false; users = { @@ -42,13 +46,16 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland" ]; }; + }; + jungleUsers = { rpenacob = { uid = 2761; isNormalUser = true; home = "/home/Computational/rpenacob"; description = "Raúl Peñacoba"; group = "Computational"; + hosts = [ "hut" ]; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" @@ -61,6 +68,7 @@ home = "/home/Computational/anavarro"; description = "Antoni Navarro"; group = "Computational"; + hosts = [ "hut" "raccoon" ]; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" diff --git a/m/module/jungle-users.nix b/m/module/jungle-users.nix new file mode 100644 index 0000000..9601d29 --- /dev/null +++ b/m/module/jungle-users.nix @@ -0,0 +1,24 @@ +{ config, lib, ... }: + +with lib; + +{ + options = { + users.jungleUsers = mkOption { + type = types.attrsOf (types.anything // { check = (x: x ? "hosts"); }); + description = '' + Same as users.users but with the extra `hosts` attribute, which controls + access to the nodes by `networking.hostName`. + ''; + }; + }; + + config = let + allowedUser = host: userConf: builtins.elem host userConf.hosts; + filterUsers = host: users: filterAttrs (n: v: allowedUser host v) users; + removeHosts = users: mapAttrs (n: v: builtins.removeAttrs v [ "hosts" ]) users; + currentHost = config.networking.hostName; + in { + users.users = removeHosts (filterUsers currentHost config.users.jungleUsers); + }; +} -- 2.49.0 From 24ee74d6141bf7dc9b16f1fb0502c43ea3ba1eb4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 3 Jun 2024 09:20:11 +0200 Subject: [PATCH 234/472] Split xeon specific configuration from base To accomodate the raccoon knights workstation, some of the configuration pulled by m/common/main.nix has to be removed. To solve it, the xeon specific parts are placed into m/common/xeon.nix and only the common configuration is at m/common/base.nix. Reviewed-by: Aleix Roca Nonell --- doc/install.md | 10 +++ flake.nix | 15 ++--- m/bay/configuration.nix | 4 +- m/common/base.nix | 19 ++++++ m/common/{ => base}/agenix.nix | 0 m/common/{ => base}/boot.nix | 2 +- m/common/base/env.nix | 35 +++++++++++ m/common/{ => base}/fs.nix | 7 --- m/common/{ => base}/hw.nix | 0 m/common/base/net.nix | 19 ++++++ m/common/base/nix.nix | 42 +++++++++++++ m/common/{ => base}/ntp.nix | 0 m/common/{ => base}/rev.nix | 3 + m/common/{ => base}/ssh.nix | 2 +- m/common/{ => base}/users.nix | 2 +- m/common/{ => base}/watchdog.nix | 0 m/common/{ => base}/zsh.nix | 0 m/common/main.nix | 96 ----------------------------- m/common/xeon.nix | 9 +++ m/common/xeon/fs.nix | 8 +++ m/common/xeon/getty.nix | 8 +++ m/common/{ => xeon}/net.nix | 8 +-- m/eudy/configuration.nix | 2 +- m/hut/configuration.nix | 2 +- m/koro/configuration.nix | 2 +- m/lake2/configuration.nix | 4 +- m/{common => module}/monitoring.nix | 0 m/owl1/configuration.nix | 2 +- m/owl2/configuration.nix | 2 +- m/raccoon/configuration.nix | 32 ++++++++++ 30 files changed, 207 insertions(+), 128 deletions(-) create mode 100644 m/common/base.nix rename m/common/{ => base}/agenix.nix (100%) rename m/common/{ => base}/boot.nix (95%) create mode 100644 m/common/base/env.nix rename m/common/{ => base}/fs.nix (68%) rename m/common/{ => base}/hw.nix (100%) create mode 100644 m/common/base/net.nix create mode 100644 m/common/base/nix.nix rename m/common/{ => base}/ntp.nix (100%) rename m/common/{ => base}/rev.nix (87%) rename m/common/{ => base}/ssh.nix (95%) rename m/common/{ => base}/users.nix (98%) rename m/common/{ => base}/watchdog.nix (100%) rename m/common/{ => base}/zsh.nix (100%) delete mode 100644 m/common/main.nix create mode 100644 m/common/xeon.nix create mode 100644 m/common/xeon/fs.nix create mode 100644 m/common/xeon/getty.nix rename m/common/{ => xeon}/net.nix (94%) rename m/{common => module}/monitoring.nix (100%) create mode 100644 m/raccoon/configuration.nix diff --git a/doc/install.md b/doc/install.md index d5e279d..66a66c9 100644 --- a/doc/install.md +++ b/doc/install.md @@ -150,3 +150,13 @@ And update grub. ``` # nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v ``` + +## Chain NixOS in same disk + +``` +menuentry 'NixOS' { + insmod chain + set root=(hd3,1) + configfile /boot/grub/grub.cfg +} +``` diff --git a/flake.nix b/flake.nix index 807721a..9ff6538 100644 --- a/flake.nix +++ b/flake.nix @@ -17,13 +17,14 @@ let in { nixosConfigurations = { - hut = mkConf "hut"; - owl1 = mkConf "owl1"; - owl2 = mkConf "owl2"; - eudy = mkConf "eudy"; - koro = mkConf "koro"; - bay = mkConf "bay"; - lake2 = mkConf "lake2"; + hut = mkConf "hut"; + owl1 = mkConf "owl1"; + owl2 = mkConf "owl2"; + eudy = mkConf "eudy"; + koro = mkConf "koro"; + bay = mkConf "bay"; + lake2 = mkConf "lake2"; + raccoon = mkConf "raccoon"; }; packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index ac34f69..6aabe6b 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -2,8 +2,8 @@ { imports = [ - ../common/main.nix - ../common/monitoring.nix + ../common/xeon.nix + ../module/monitoring.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/common/base.nix b/m/common/base.nix new file mode 100644 index 0000000..f6b74ea --- /dev/null +++ b/m/common/base.nix @@ -0,0 +1,19 @@ +{ + # All machines should include this profile. + # Includes the basic configuration for an Intel server. + imports = [ + ./base/agenix.nix + ./base/boot.nix + ./base/env.nix + ./base/fs.nix + ./base/hw.nix + ./base/net.nix + ./base/nix.nix + ./base/ntp.nix + ./base/rev.nix + ./base/ssh.nix + ./base/users.nix + ./base/watchdog.nix + ./base/zsh.nix + ]; +} diff --git a/m/common/agenix.nix b/m/common/base/agenix.nix similarity index 100% rename from m/common/agenix.nix rename to m/common/base/agenix.nix diff --git a/m/common/boot.nix b/m/common/base/boot.nix similarity index 95% rename from m/common/boot.nix rename to m/common/base/boot.nix index 8b71901..a3408ab 100644 --- a/m/common/boot.nix +++ b/m/common/base/boot.nix @@ -2,7 +2,7 @@ { # Use the GRUB 2 boot loader. - boot.loader.grub.enable = lib.mkForce true; + boot.loader.grub.enable = true; # Enable GRUB2 serial console boot.loader.grub.extraConfig = '' diff --git a/m/common/base/env.nix b/m/common/base/env.nix new file mode 100644 index 0000000..98943ed --- /dev/null +++ b/m/common/base/env.nix @@ -0,0 +1,35 @@ +{ pkgs, config, ... }: + +{ + environment.systemPackages = with pkgs; [ + vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option + nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree + ncdu config.boot.kernelPackages.perf ldns + # From bsckgs overlay + osumb + ]; + + programs.direnv.enable = true; + + # Increase limits + security.pam.loginLimits = [ + { + domain = "*"; + type = "-"; + item = "memlock"; + value = "1048576"; # 1 GiB of mem locked + } + ]; + + environment.variables = { + EDITOR = "vim"; + VISUAL = "vim"; + }; + + programs.bash.promptInit = '' + PS1="\h\\$ " + ''; + + time.timeZone = "Europe/Madrid"; + i18n.defaultLocale = "en_DK.UTF-8"; +} diff --git a/m/common/fs.nix b/m/common/base/fs.nix similarity index 68% rename from m/common/fs.nix rename to m/common/base/fs.nix index c6fea28..0c785b9 100644 --- a/m/common/fs.nix +++ b/m/common/base/fs.nix @@ -13,13 +13,6 @@ [ { device = "/dev/disk/by-label/swap"; } ]; - # Mount the home via NFS - fileSystems."/home" = { - device = "10.0.40.30:/home"; - fsType = "nfs"; - options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; - }; - # Tracing fileSystems."/sys/kernel/tracing" = { device = "none"; diff --git a/m/common/hw.nix b/m/common/base/hw.nix similarity index 100% rename from m/common/hw.nix rename to m/common/base/hw.nix diff --git a/m/common/base/net.nix b/m/common/base/net.nix new file mode 100644 index 0000000..e49d204 --- /dev/null +++ b/m/common/base/net.nix @@ -0,0 +1,19 @@ +{ pkgs, ... }: + +{ + networking = { + enableIPv6 = false; + useDHCP = false; + + firewall = { + enable = true; + allowedTCPPorts = [ 22 ]; + }; + + hosts = { + "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ]; + "84.88.51.152" = [ "raccoon" ]; + "84.88.51.142" = [ "raccoon-ipmi" ]; + }; + }; +} diff --git a/m/common/base/nix.nix b/m/common/base/nix.nix new file mode 100644 index 0000000..aef1397 --- /dev/null +++ b/m/common/base/nix.nix @@ -0,0 +1,42 @@ +{ pkgs, nixpkgs, bscpkgs, theFlake, ... }: + +{ + nixpkgs.overlays = [ + bscpkgs.bscOverlay + (import ../../../pkgs/overlay.nix) + ]; + + nix = { + nixPath = [ + "nixpkgs=${nixpkgs}" + "jungle=${theFlake.outPath}" + ]; + + registry = { + nixpkgs.flake = nixpkgs; + jungle.flake = theFlake; + }; + + settings = { + experimental-features = [ "nix-command" "flakes" ]; + sandbox = "relaxed"; + trusted-users = [ "@wheel" ]; + flake-registry = pkgs.writeText "global-registry.json" + ''{"flakes":[],"version":2}''; + }; + + gc = { + automatic = true; + dates = "weekly"; + options = "--delete-older-than 30d"; + }; + }; + + # This value determines the NixOS release from which the default + # settings for stateful data, like file locations and database versions + # on your system were taken. It‘s perfectly fine and recommended to leave + # this value at the release version of the first install of this system. + # Before changing this value read the documentation for this option + # (e.g. man configuration.nix or on https://nixos.org/nixos/options.html). + system.stateVersion = "22.11"; # Did you read the comment? +} diff --git a/m/common/ntp.nix b/m/common/base/ntp.nix similarity index 100% rename from m/common/ntp.nix rename to m/common/base/ntp.nix diff --git a/m/common/rev.nix b/m/common/base/rev.nix similarity index 87% rename from m/common/rev.nix rename to m/common/base/rev.nix index 80d019b..f2be747 100644 --- a/m/common/rev.nix +++ b/m/common/base/rev.nix @@ -1,6 +1,7 @@ { theFlake, ... }: let + # Prevent building a configuration without revision rev = if theFlake ? rev then theFlake.rev else throw ("Refusing to build from a dirty Git tree!"); in { @@ -15,4 +16,6 @@ in { DATENOW=$(date --iso-8601=seconds) echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log ''; + + system.configurationRevision = rev; } diff --git a/m/common/ssh.nix b/m/common/base/ssh.nix similarity index 95% rename from m/common/ssh.nix rename to m/common/base/ssh.nix index b8cb5c1..13f2d4d 100644 --- a/m/common/ssh.nix +++ b/m/common/base/ssh.nix @@ -1,7 +1,7 @@ { lib, ... }: let - keys = import ../../keys.nix; + keys = import ../../../keys.nix; hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts; in { diff --git a/m/common/users.nix b/m/common/base/users.nix similarity index 98% rename from m/common/users.nix rename to m/common/base/users.nix index 02680fc..71b9749 100644 --- a/m/common/users.nix +++ b/m/common/base/users.nix @@ -2,7 +2,7 @@ { imports = [ - ../module/jungle-users.nix + ../../module/jungle-users.nix ]; users = { diff --git a/m/common/watchdog.nix b/m/common/base/watchdog.nix similarity index 100% rename from m/common/watchdog.nix rename to m/common/base/watchdog.nix diff --git a/m/common/zsh.nix b/m/common/base/zsh.nix similarity index 100% rename from m/common/zsh.nix rename to m/common/base/zsh.nix diff --git a/m/common/main.nix b/m/common/main.nix deleted file mode 100644 index 4a4671d..0000000 --- a/m/common/main.nix +++ /dev/null @@ -1,96 +0,0 @@ -{ config, pkgs, nixpkgs, bscpkgs, agenix, theFlake, ... }: - -{ - imports = [ - ./agenix.nix - ./boot.nix - ./fs.nix - ./hw.nix - ./net.nix - ./ntp.nix - ./ssh.nix - ./users.nix - ./watchdog.nix - ./rev.nix - ./zsh.nix - ]; - - nixpkgs.overlays = [ - bscpkgs.bscOverlay - (import ../../pkgs/overlay.nix) - ]; - - system.configurationRevision = - if theFlake ? rev - then theFlake.rev - else throw ("Refusing to build from a dirty Git tree!"); - - nix.nixPath = [ - "nixpkgs=${nixpkgs}" - "jungle=${theFlake.outPath}" - ]; - - nix.settings.flake-registry = - pkgs.writeText "global-registry.json" ''{"flakes":[],"version":2}''; - - nix.registry.nixpkgs.flake = nixpkgs; - nix.registry.jungle.flake = theFlake; - - environment.systemPackages = with pkgs; [ - vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree - ncdu config.boot.kernelPackages.perf ldns - # From bsckgs overlay - osumb - ]; - - programs.direnv.enable = true; - - systemd.services."serial-getty@ttyS0" = { - enable = true; - wantedBy = [ "getty.target" ]; - serviceConfig.Restart = "always"; - }; - - # Increase limits - security.pam.loginLimits = [ - { - domain = "*"; - type = "-"; - item = "memlock"; - value = "1048576"; # 1 GiB of mem locked - } - ]; - - time.timeZone = "Europe/Madrid"; - i18n.defaultLocale = "en_DK.UTF-8"; - - environment.variables = { - EDITOR = "vim"; - VISUAL = "vim"; - }; - - nix.settings.experimental-features = [ "nix-command" "flakes" ]; - nix.settings.sandbox = "relaxed"; - nix.settings.trusted-users = [ "@wheel" ]; - nix.gc.automatic = true; - nix.gc.dates = "weekly"; - nix.gc.options = "--delete-older-than 30d"; - - programs.bash.promptInit = '' - PS1="\h\\$ " - ''; - - # Copy the NixOS configuration file and link it from the resulting system - # (/run/current-system/configuration.nix). This is useful in case you - # accidentally delete configuration.nix. - #system.copySystemConfiguration = true; - - # This value determines the NixOS release from which the default - # settings for stateful data, like file locations and database versions - # on your system were taken. It‘s perfectly fine and recommended to leave - # this value at the release version of the first install of this system. - # Before changing this value read the documentation for this option - # (e.g. man configuration.nix or on https://nixos.org/nixos/options.html). - system.stateVersion = "22.11"; # Did you read the comment? -} diff --git a/m/common/xeon.nix b/m/common/xeon.nix new file mode 100644 index 0000000..25d4121 --- /dev/null +++ b/m/common/xeon.nix @@ -0,0 +1,9 @@ +{ + # Provides the base system for a xeon node. + imports = [ + ./base.nix + ./xeon/fs.nix + ./xeon/getty.nix + ./xeon/net.nix + ]; +} diff --git a/m/common/xeon/fs.nix b/m/common/xeon/fs.nix new file mode 100644 index 0000000..c50b3ff --- /dev/null +++ b/m/common/xeon/fs.nix @@ -0,0 +1,8 @@ +{ + # Mount the home via NFS + fileSystems."/home" = { + device = "10.0.40.30:/home"; + fsType = "nfs"; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; + }; +} diff --git a/m/common/xeon/getty.nix b/m/common/xeon/getty.nix new file mode 100644 index 0000000..40c5701 --- /dev/null +++ b/m/common/xeon/getty.nix @@ -0,0 +1,8 @@ +{ + # Restart the serial console + systemd.services."serial-getty@ttyS0" = { + enable = true; + wantedBy = [ "getty.target" ]; + serviceConfig.Restart = "always"; + }; +} diff --git a/m/common/net.nix b/m/common/xeon/net.nix similarity index 94% rename from m/common/net.nix rename to m/common/xeon/net.nix index 1c9c569..dbfb5ea 100644 --- a/m/common/net.nix +++ b/m/common/xeon/net.nix @@ -6,10 +6,9 @@ boot.kernelModules = [ "ib_umad" "ib_ipoib" ]; networking = { - enableIPv6 = false; - useDHCP = false; defaultGateway = "10.0.40.30"; nameservers = ["8.8.8.8"]; + proxy = { default = "http://localhost:23080/"; noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40"; @@ -19,8 +18,6 @@ }; firewall = { - enable = true; - allowedTCPPorts = [ 22 ]; extraCommands = '' # Prevent ssfhead from contacting our slurmd daemon iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse @@ -32,8 +29,7 @@ }; extraHosts = '' - 10.0.40.30 ssfhead - 84.88.53.236 ssfhead.bsc.es ssfhead + 10.0.40.30 ssfhead # Node Entry for node: mds01 (ID=72) 10.0.40.40 bay mds01 mds01-eth0 diff --git a/m/eudy/configuration.nix b/m/eudy/configuration.nix index b0889d8..29d495a 100644 --- a/m/eudy/configuration.nix +++ b/m/eudy/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/main.nix + ../common/xeon.nix #(modulesPath + "/installer/netboot/netboot-minimal.nix") ./kernel/kernel.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index aa438c0..fe0f5d9 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/main.nix + ../common/xeon.nix ../module/ceph.nix ../module/debuginfod.nix diff --git a/m/koro/configuration.nix b/m/koro/configuration.nix index 9c92ef3..864efe5 100644 --- a/m/koro/configuration.nix +++ b/m/koro/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/main.nix + ../common/xeon.nix #(modulesPath + "/installer/netboot/netboot-minimal.nix") ../eudy/cpufreq.nix diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 8e19a12..04627a8 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -2,8 +2,8 @@ { imports = [ - ../common/main.nix - ../common/monitoring.nix + ../common/xeon.nix + ../module/monitoring.nix ]; boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; diff --git a/m/common/monitoring.nix b/m/module/monitoring.nix similarity index 100% rename from m/common/monitoring.nix rename to m/module/monitoring.nix diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index b208139..1b9c4f3 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/main.nix + ../common/xeon.nix ../module/ceph.nix ../module/slurm-client.nix ../module/slurm-firewall.nix diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index fac678b..1b7d4be 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/main.nix + ../common/xeon.nix ../module/ceph.nix ../module/slurm-client.nix ../module/slurm-firewall.nix diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix new file mode 100644 index 0000000..556e9ab --- /dev/null +++ b/m/raccoon/configuration.nix @@ -0,0 +1,32 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/base.nix + ]; + + # Don't install Grub on the disk yet + boot.loader.grub.device = "nodev"; + + networking = { + hostName = "raccoon"; + # Only BSC DNSs seem to be reachable from the office VLAN + nameservers = [ "84.88.52.35" "84.88.52.36" ]; + defaultGateway = "84.88.51.129"; + interfaces.eno0.ipv4.addresses = [ { + address = "84.88.51.152"; + prefixLength = 25; + } ]; + }; + + # Configure Nvidia driver to use with CUDA + hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; + hardware.opengl = { + enable = true; + driSupport = true; + setLdLibraryPath = true; + }; + nixpkgs.config.allowUnfree = true; + nixpkgs.config.nvidia.acceptLicense = true; + services.xserver.videoDrivers = [ "nvidia" ]; +} -- 2.49.0 From 525cad4117a5f433bcae966f84219affca33378a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 6 Jun 2024 19:36:53 +0200 Subject: [PATCH 235/472] Add raccoon motd file Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 556e9ab..b2c7110 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -29,4 +29,34 @@ nixpkgs.config.allowUnfree = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; + + users.motd = '' + ⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⢰⠇⡀⠀⠙⠻⡿⣦⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⡎⢰⣧⠀⠀⠀⠁⠈⠛⢿⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣠⣴⡦⠶⠟⠓⠚⠻⡄⠀ + ⠀⠀⠀⠀⠀⠀⣧⠀⣱⣀⣰⣧⠀⢀⠀⣘⣿⣿⣦⣶⣄⣠⡀⠀⠀⣀⣀⣤⣴⣄⣀⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⣿⠿⠏⠁⠀⣀⣠⣶⣿⡶⣿⠀ + ⠀⠀⠀⠀⠀⠀⣹⣆⠘⣿⣿⣿⣇⢸⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣾⣿⣿⣿⣿⣿⣿⣿⣿⣶⣶⣦⡀⣀⣤⣠⣤⡾⠋⠀⢀⣤⣶⣿⣿⣿⣿⣿⣿⣿⡀ + ⠀⠀⠀⠀⠀⠀⠘⢿⡄⢼⣿⣿⣿⣿⣿⡟⠻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣵⣾⡾⠙⣋⣩⣽⣿⣿⣿⣿⢋⡼⠁ + ⠀⠀⠀⠀⠀⠀⠀⠈⢻⣄⠸⢿⣿⣿⠿⠷⠀⠈⠀⣭⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣾⣿⣿⣿⣿⣿⣿⠇⡼⠁⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⢾⣯⡀⠀⢼⡿⠀⠀⠀⢼⠿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⣿⡿⣿⣿⣿⠿⣿⣯⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⢋⡼⠁⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢻⡏⠠⣦⠁⠀⠀⠀⠀⠀⠟⠛⠛⣿⣿⣿⣿⣿⠿⠁⠀⠁⢿⠙⠁⠀⠛⠹⣿⣏⣾⣿⣿⣿⣿⣿⣿⣿⣿⠿⠃⣹⠁⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⣘⣧⠀⠙⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⡿⡿⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⢹⣿⠿⢿⣿⣿⣿⣿⣿⠋⢀⡤⠛⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢹⡯⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣿⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠁⠀⢸⣿⣿⣿⠛⠉⠀⣰⠷⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⠇⠀⠀⠀⠀⠀⢀⣿⡇⠀⠀⢻⣿⣿⠁⠀⠀⢠⣾⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⠟⢿⣿⣄⡀⢸⣿⡀⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⢀⣿⠀⠀⠀⢰⣿⣿⡛⣿⣿⡄⢠⡺⠿⡍⠁⢀⣤⣿⣿⣿⠿⣷⣮⣉⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠈⣿⠀⠀⠈⣧⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⢾⠉⠃⠀⣴⣿⣟⠻⣿⣿⣿⡇⢸⣿⣶⠀⢀⣾⣿⣿⣟⠿⣷⣾⣿⣿⣿⣿⣦⣤⣤⡤⠀⠀⠀⠀⠀⠁⠀⠀⠀⣼⠗⠀⠀⠀⠀ + ⠀⠀⠐⢄⡀⠀⠀⠀⢘⡀⠀⢶⣾⣿⣿⣿⣿⡿⠋⠁⠈⠻⠉⠀⠚⠻⣿⣿⣿⣶⣾⣿⣿⣿⣿⣿⣿⣷⣬⣤⣶⣦⡀⣾⣶⣇⠀⠀⠈⢉⣷⠀⠀⠀⠀ + ⠀⠀⠀⠀⠈⠓⠶⢦⡽⠄⣈⣿⣿⣿⣿⣿⠏⠀⠀⠀⠀⠀⠀⠀⠀⠀⠹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡓⠙⣿⡟⠀⠀⠀⠈⠛⣷⣶⡄⠀ + ⠀⠀⠀⠀⠀⠀⠀⢀⣬⠆⢠⣍⣛⠻⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣉⣀⡀⠀⠀⠈⠛⢿⣦⡀ + ⠐⠒⠒⠶⠶⠶⢦⣬⣟⣥⣀⡉⠛⠻⠶⢁⣤⣾⣿⣿⣿⣷⡄⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⣿⣟⡛⠿⠭⠭⠭⠭⠭⠿⠿⠿⢿⣿⣟⠃⠀⠀⠀⠹⣟⠓ + ⠀⣀⣠⠤⠤⢤⣤⣾⣤⡄⣉⣉⣙⣓⡂⣿⣿⣭⣹⣿⣿⣿⣿⡰⣂⣀⢀⠀⠻⣿⠛⠻⠟⠡⣶⣾⣿⣿⣿⣿⣿⣿⣿⡖⠒⠒⠒⠛⠷⢤⡀⢰⣴⣿⡆ + ⠀⠀⠀⢀⣠⡴⠾⠟⠻⣟⡉⠉⠉⠉⢁⢿⣿⣿⣿⣿⣿⣿⡿⣱⣿⣭⡌⠤⠀⠀⠐⣶⣌⡻⣶⣭⡻⢿⣿⣿⣿⣿⣿⣯⣥⣤⣦⠀⠠⣴⣶⣶⣿⡟⢿ + ⢀⠔⠊⠉⠀⠀⠀⠀⢸⣯⣤⠀⠀⠠⣼⣮⣟⣿⣿⣿⣻⣭⣾⣿⣿⣷⣶⣦⠶⣚⣾⣿⣿⣷⣜⣿⣿⣶⣝⢿⣿⣿⣿⣿⣷⣦⣄⣰⡄⠈⢿⣿⡿⣇⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠈⢡⢇⠀⠀⣠⣿⣿⣿⣯⣟⣛⣛⣛⣛⣛⣩⣭⣴⣶⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣻⣿⣧⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⣾⠏⠀⢹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣍⣿⣿⣿⣿⡄⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣾⡁⢈⣾⣿⡿⠛⣛⣿⣿⣿⣿ DO YOU BRING FEEDS? ⣿⣿⣿⣿⣿⣿⡏⠈⠙⠈⠁⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⡿⠛⠉⣽⣿⣷⣾⡿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠌⠛⠉⠀⠁⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠹⠋⠀⢻⣿⣿⣿⣿⠿⢿⣿⣿⣿⣿⣿⣿⠿⣿⣿⣿⣿⠿⠛⠋⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⠉⠁⠀⠀⠀⠀⠀⠈⠉⠉⠀⠀⠈⠋⠉⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ + ''; } -- 2.49.0 From a0dab66aa570b0bc94c9b9460ffca74bb16a0d1c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 7 Jun 2024 10:06:58 +0200 Subject: [PATCH 236/472] Move vlopez user to jungleUsers for koro host Access to other machines can be easily added into the "hosts" attribute without the need to replicate the configuration. Reviewed-by: Aleix Roca Nonell --- m/common/base/users.nix | 13 +++++++++++++ m/koro/configuration.nix | 1 - m/koro/users.nix | 17 ----------------- 3 files changed, 13 insertions(+), 18 deletions(-) delete mode 100644 m/koro/users.nix diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 71b9749..3d21b0c 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -74,6 +74,19 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" ]; }; + + vlopez = { + uid = 4334; + isNormalUser = true; + home = "/home/Computational/vlopez"; + description = "Victor López"; + group = "Computational"; + hosts = [ "koro" ]; + hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch" + ]; + }; }; groups = { diff --git a/m/koro/configuration.nix b/m/koro/configuration.nix index 864efe5..4214fee 100644 --- a/m/koro/configuration.nix +++ b/m/koro/configuration.nix @@ -7,7 +7,6 @@ ../eudy/cpufreq.nix ../eudy/users.nix - ./users.nix ./kernel.nix ]; diff --git a/m/koro/users.nix b/m/koro/users.nix deleted file mode 100644 index 4a4f794..0000000 --- a/m/koro/users.nix +++ /dev/null @@ -1,17 +0,0 @@ -{ ... }: - -{ - users.users = { - vlopez = { - uid = 4334; - isNormalUser = true; - home = "/home/Computational/vlopez"; - description = "Victor López"; - group = "Computational"; - hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0"; - openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch" - ]; - }; - }; -} -- 2.49.0 From 59ab6405c529271eb2564f401db125cc43070e32 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 7 Jun 2024 13:46:33 +0200 Subject: [PATCH 237/472] Monitor raccoon machine via IPMI Reviewed-by: Aleix Roca Nonell --- m/hut/monitoring.nix | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 8f0d5fa..8abacbf 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -232,6 +232,17 @@ } ]; } + { + job_name = "ipmi-raccoon"; + metrics_path = "/ipmi"; + static_configs = [ + { targets = [ "127.0.0.1:9291" ]; } + ]; + params = { + target = [ "84.88.51.142" ]; + module = [ "raccoon" ]; + }; + } ]; }; } -- 2.49.0 From 349f69e30abb8c21b69bd30ee9b1de4b7ae82e15 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 21 Jun 2024 13:52:08 +0200 Subject: [PATCH 238/472] Add support for armv7 emulation in hut Reviewed-by: Aleix Roca Nonell --- m/hut/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index fe0f5d9..db1805c 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -19,7 +19,7 @@ #./pxe.nix ]; - boot.binfmt.emulatedSystems = [ "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; + boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; -- 2.49.0 From 130e191d3751a887c841a549c95411f61730f0ca Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Tue, 16 Jul 2024 17:36:21 +0200 Subject: [PATCH 239/472] eudy: koro: fcs: Fix fcs unprotected cpuid all smp_processor_id() was called in a preepmtible context, which could invalidate the returned value. However, this was not harmful, because fcs threads in nosv are pinned. Reviewed-by: Rodrigo Arias Mallo --- m/eudy/kernel/kernel.nix | 34 +++++----------------------- m/koro/kernel.nix | 48 ++++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 49 deletions(-) diff --git a/m/eudy/kernel/kernel.nix b/m/eudy/kernel/kernel.nix index 016b8cb..98310ce 100644 --- a/m/eudy/kernel/kernel.nix +++ b/m/eudy/kernel/kernel.nix @@ -21,9 +21,9 @@ let # configfile = if lockdep then ./configs/lockdep else ./configs/defconfig; #}; - kernel = nixos-fcsv3; + kernel = nixos-fcs; - nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { version = "6.2.8"; src = builtins.fetchGit { url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; @@ -40,35 +40,13 @@ let }; kernelPatches = []; extraMeta.branch = lib.versions.majorMinor version; - }); + })); - nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";}; - nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";}; - nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";}; - - # always use fcs_sched_setaffinity - #nixos-debug = nixos-fcs-kernel {gitCommit = "7d0bf285fca92badc8df3c9907a9ab30db4418aa";}; - # remove need_check_cgroup - #nixos-debug = nixos-fcs-kernel {gitCommit = "4cc4efaab5e4a0bfa3089e935215b981c1922919";}; - # merge again fcs_wake and fcs_wait - #nixos-debug = nixos-fcs-kernel {gitCommit = "40c6f72f4ae54b0b636b193ac0648fb5730c810d";}; - # start from scratch, this is the working version with split fcs_wake and fcs_wait - nixos-debug = nixos-fcs-kernel {gitCommit = "c9a39d6a4ca83845b4e71fcc268fb0a76aff1bdf"; branch = "fcs-test"; }; - - nixos-fcsv1-lockstat = nixos-fcs-kernel { - gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be"; + nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";}; + nixos-fcs-lockstat = nixos-fcs.override { lockStat = true; }; - nixos-fcsv2-lockstat = nixos-fcs-kernel { - gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; - lockStat = true; - }; - nixos-fcsv3-lockstat = nixos-fcs-kernel { - gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; - lockStat = true; - }; - nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel { - gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; + nixos-fcs-lockstat-preempt = nixos-fcs.override { lockStat = true; preempt = true; }; diff --git a/m/koro/kernel.nix b/m/koro/kernel.nix index 016ac52..7ddf4a8 100644 --- a/m/koro/kernel.nix +++ b/m/koro/kernel.nix @@ -1,9 +1,29 @@ { pkgs, lib, ... }: let - kernel = nixos-fcsv4; + #fcs-devel = pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = /mnt/data/kernel/fcs/kernel/src; + # configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig; + #}; - nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + #fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false; + #fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false; + #fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true; + #fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true; + #fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = builtins.fetchGit { + # url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + # rev = gitCommit; + # ref = "fcs"; + # }; + # configfile = if lockdep then ./configs/lockdep else ./configs/defconfig; + #}; + + kernel = nixos-fcs; + + nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { version = "6.2.8"; src = builtins.fetchGit { url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; @@ -20,27 +40,13 @@ let }; kernelPatches = []; extraMeta.branch = lib.versions.majorMinor version; - }); + })); - nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";}; - nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";}; - nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";}; - nixos-fcsv4 = nixos-fcs-kernel {gitCommit = "c94c3d946f33ac3e5782a02ee002cc1164c0cb4f";}; - - nixos-fcsv1-lockstat = nixos-fcs-kernel { - gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be"; + nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";}; + nixos-fcs-lockstat = nixos-fcs.override { lockStat = true; }; - nixos-fcsv2-lockstat = nixos-fcs-kernel { - gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1"; - lockStat = true; - }; - nixos-fcsv3-lockstat = nixos-fcs-kernel { - gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; - lockStat = true; - }; - nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel { - gitCommit = "6c17394890704c3345ac1a521bb547164b36b154"; + nixos-fcs-lockstat-preempt = nixos-fcs.override { lockStat = true; preempt = true; }; @@ -60,5 +66,5 @@ in { # enable memory overcommit, needed to build a taglibc system using nix after # increasing the openblas memory footprint - boot.kernel.sysctl."vm.overcommit_memory" = lib.mkForce 1; + boot.kernel.sysctl."vm.overcommit_memory" = 1; } -- 2.49.0 From e7376917bd296498a47621bc8cf301d9cd3618e4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 17 Jul 2024 12:56:59 +0200 Subject: [PATCH 240/472] Allow incoming traffic to hut proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index db1805c..63cfe00 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -34,5 +34,15 @@ address = "10.0.42.7"; prefixLength = 24; } ]; + firewall = { + extraCommands = '' + # Accept all proxy traffic from compute nodes but not the login + iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept + ''; + }; }; + + # Allow proxy to bind to the ethernet interface + services.openssh.settings.GatewayPorts = "clientspecified"; } -- 2.49.0 From 9c4e60c2c2cf734418ea697718fbc33b712deeb4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 17 Jul 2024 12:59:02 +0200 Subject: [PATCH 241/472] Set the default proxy to point to hut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/xeon/net.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/xeon/net.nix b/m/common/xeon/net.nix index dbfb5ea..a5d87b6 100644 --- a/m/common/xeon/net.nix +++ b/m/common/xeon/net.nix @@ -10,7 +10,7 @@ nameservers = ["8.8.8.8"]; proxy = { - default = "http://localhost:23080/"; + default = "http://hut:23080/"; noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40"; # Don't set all_proxy as go complains and breaks the gitlab runner, see: # https://github.com/golang/go/issues/16715 -- 2.49.0 From b17e4a13f973160b19880e16968431ff5de2b6d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 17 Jul 2024 12:47:53 +0200 Subject: [PATCH 242/472] Access private repositories via hut SSH proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/ssh.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/ssh.nix b/m/common/base/ssh.nix index 13f2d4d..5e12ede 100644 --- a/m/common/base/ssh.nix +++ b/m/common/base/ssh.nix @@ -12,7 +12,7 @@ in programs.ssh.extraConfig = '' Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es User git - ProxyCommand nc -X connect -x localhost:23080 %h %p + ProxyCommand nc -X connect -x hut:23080 %h %p ''; programs.ssh.knownHosts = hostsKeys // { -- 2.49.0 From 5ea7827a8a6113f53ed4640ddef0e16a7c3a015e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 16 Jul 2024 18:04:16 +0200 Subject: [PATCH 243/472] Grant rpenacob access to owl1 and owl2 nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 3d21b0c..67e9b38 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -55,7 +55,7 @@ home = "/home/Computational/rpenacob"; description = "Raúl Peñacoba"; group = "Computational"; - hosts = [ "hut" ]; + hosts = [ "owl1" "owl2" "hut" ]; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" -- 2.49.0 From 58abaefbc47c3c61b0df2b21846a86aaf241da23 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 16 Jul 2024 18:16:05 +0200 Subject: [PATCH 244/472] Add abonerib user to hut, raccon, owl1 and owl2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 67e9b38..1484830 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -75,6 +75,19 @@ ]; }; + abonerib = { + uid = 4541; + isNormalUser = true; + home = "/home/Computational/abonerib"; + description = "Aleix Boné"; + group = "Computational"; + hosts = [ "owl1" "owl2" "hut" "raccoon" ]; + hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" + ]; + }; + vlopez = { uid = 4334; isNormalUser = true; -- 2.49.0 From 6e9d33b483245dc99ea5290c73dbe165406525aa Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 17 Jul 2024 13:10:59 +0200 Subject: [PATCH 245/472] Allow ptrace to any process of the same user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows users to attach GDB to their own processes, without requiring running the program with GDB from the start. It is only available in compute nodes, the storage nodes continue with the restricted settings. Reviewed-by: Aleix Boné --- m/bay/configuration.nix | 4 ++++ m/common/base/boot.nix | 4 ++++ m/lake2/configuration.nix | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 6aabe6b..5f215cf 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -9,6 +9,10 @@ # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d"; + boot.kernel.sysctl = { + "kernel.yama.ptrace_scope" = lib.mkForce "1"; + }; + environment.systemPackages = with pkgs; [ ceph ]; diff --git a/m/common/base/boot.nix b/m/common/base/boot.nix index a3408ab..0fb0855 100644 --- a/m/common/base/boot.nix +++ b/m/common/base/boot.nix @@ -19,6 +19,10 @@ boot.kernel.sysctl = { "kernel.perf_event_paranoid" = lib.mkDefault "-1"; + + # Allow ptracing (i.e. attach with GDB) any process of the same user, see: + # https://www.kernel.org/doc/Documentation/security/Yama.txt + "kernel.yama.ptrace_scope" = "0"; }; boot.kernelPackages = pkgs.linuxPackages_latest; diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 04627a8..2a29ae7 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -8,6 +8,10 @@ boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; + boot.kernel.sysctl = { + "kernel.yama.ptrace_scope" = lib.mkForce "1"; + }; + environment.systemPackages = with pkgs; [ ceph ]; -- 2.49.0 From 7ed74931cf5cf21796d59601b5a349e24fb09870 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 16 Jul 2024 14:12:06 +0200 Subject: [PATCH 246/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'agenix': 'github:ryantm/agenix/1381a759b205dff7a6818733118d02253340fd5e' (2024-04-02) → 'github:ryantm/agenix/de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6' (2024-07-09) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/6143fc5eeb9c4f00163267708e26191d1e918932' (2024-04-21) → 'github:NixOS/nixpkgs/693bc46d169f5af9c992095736e82c3488bf7dbb' (2024-07-14) Reviewed-by: Aleix Boné --- flake.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flake.lock b/flake.lock index 5139cc1..9def45b 100644 --- a/flake.lock +++ b/flake.lock @@ -10,11 +10,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1712079060, - "narHash": "sha256-/JdiT9t+zzjChc5qQiF+jhrVhRt8figYH29rZO7pFe4=", + "lastModified": 1720546205, + "narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=", "owner": "ryantm", "repo": "agenix", - "rev": "1381a759b205dff7a6818733118d02253340fd5e", + "rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6", "type": "github" }, "original": { @@ -88,11 +88,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1713714899, - "narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=", + "lastModified": 1720957393, + "narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "6143fc5eeb9c4f00163267708e26191d1e918932", + "rev": "693bc46d169f5af9c992095736e82c3488bf7dbb", "type": "github" }, "original": { -- 2.49.0 From b86798cd694bc67594b4c41d52d46c75cf539182 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 16 Jul 2024 14:58:58 +0200 Subject: [PATCH 247/472] Use authentication tokens for PM GitLab runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with GitLab 16, there is a new mechanism to authenticate the runners via authentication tokens, so use it instead. Older tokens and runners are also removed, as they are no longer used. With the new way of managing tokens, both the tags and the locked state are managed from the GitLab web page. See: https://docs.gitlab.com/ee/ci/runners/new_creation_workflow.html Reviewed-by: Aleix Boné --- m/hut/gitlab-runner.nix | 24 +++++------------------- secrets/gitlab-bsc-es-token.age | 11 ----------- secrets/gitlab-runner-docker-token.age | 9 +++++++++ secrets/gitlab-runner-shell-token.age | Bin 0 -> 514 bytes secrets/nosv-token.age | 11 ----------- secrets/ovni-token.age | Bin 553 -> 0 bytes secrets/secrets.nix | 5 ++--- 7 files changed, 16 insertions(+), 44 deletions(-) delete mode 100644 secrets/gitlab-bsc-es-token.age create mode 100644 secrets/gitlab-runner-docker-token.age create mode 100644 secrets/gitlab-runner-shell-token.age delete mode 100644 secrets/nosv-token.age delete mode 100644 secrets/ovni-token.age diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index 3cbe4f6..226099b 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -1,9 +1,8 @@ { pkgs, lib, config, ... }: { - age.secrets.ovniToken.file = ../../secrets/ovni-token.age; - age.secrets.gitlabToken.file = ../../secrets/gitlab-bsc-es-token.age; - age.secrets.nosvToken.file = ../../secrets/nosv-token.age; + age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age; + age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age; services.gitlab-runner = { enable = true; @@ -11,20 +10,14 @@ services = let common-shell = { executor = "shell"; - tagList = [ "nix" "xeon" ]; - registrationFlags = [ - # Using space doesn't work, and causes it to misread the next flag - "--locked='false'" - ]; environmentVariables = { SHELL = "${pkgs.bash}/bin/bash"; }; }; common-docker = { + executor = "docker"; dockerImage = "debian:stable"; - tagList = [ "docker" "xeon" ]; registrationFlags = [ - "--locked='false'" "--docker-network-mode host" ]; environmentVariables = { @@ -33,19 +26,12 @@ }; }; in { - # For gitlab.bsc.es - gitlab-bsc-es-shell = common-shell // { - registrationConfigFile = config.age.secrets.gitlabToken.path; - }; - gitlab-bsc-es-docker = common-docker // { - registrationConfigFile = config.age.secrets.gitlabToken.path; - }; # For pm.bsc.es/gitlab gitlab-pm-shell = common-shell // { - registrationConfigFile = config.age.secrets.ovniToken.path; + authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path; }; gitlab-pm-docker = common-docker // { - registrationConfigFile = config.age.secrets.ovniToken.path; + authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path; }; }; }; diff --git a/secrets/gitlab-bsc-es-token.age b/secrets/gitlab-bsc-es-token.age deleted file mode 100644 index ffe7aaf..0000000 --- a/secrets/gitlab-bsc-es-token.age +++ /dev/null @@ -1,11 +0,0 @@ -age-encryption.org/v1 --> ssh-ed25519 HY2yRg caTbx0NBmsTSmZH4HtBaxhsauWqWUDTesJqT08UsoEQ -8ND31xuco+H8d5SKg8xsCFRPVDhU4d8UKwV1BnmKVjQ --> ssh-ed25519 CAWG4Q 4ETYuhCwHHECkut4DWDknMMgpAvFqtzLWVC2Wi2L8FM -BGMvRnAfd8qZG5hzLefmk32FkGvwzE9pqBUyx4JY0co --> ssh-ed25519 MSF3dg hj5QL4ZfylN8/W/MXQHvVqtI7mRvlQOYr8HsaQEmPB0 -kvB7sljmmkswSGZDQnrwdTbTsN78EAwH3pz1pPe0Hu0 --> )Q-grease vHF} [8p1> @7z;C"/ -tgSUKFyyrf2jLXZp+pakigwB2fRO/WFj2Qnt1aPjtVPEK92JbJ4 ---- xzM0AhV4gTQE0Q7inJNo9vFj+crJQxWeI7u9pl7bqAI -6nGJ0B7Fbٽ2L]2zl&eKx9SWNV"MfKHUC:1b;9StDuѧϢ̟f71I(d \ No newline at end of file diff --git a/secrets/gitlab-runner-docker-token.age b/secrets/gitlab-runner-docker-token.age new file mode 100644 index 0000000..b11b2d2 --- /dev/null +++ b/secrets/gitlab-runner-docker-token.age @@ -0,0 +1,9 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg WvKK6U1wQtx2pbUDfuaUIXTQiCulDkz7hgUCSwMfMzQ +jLktUMqKuVxukqzz++pHOKvmucUQqeKYy5IwBma7KxY +-> ssh-ed25519 CAWG4Q XKGuNNoYFl9bdZzsqYYTY7GsEt5sypLW4R+1uk78NmU +8dIA2GzRAwTGM5CDHSM2BUBsbXzEAUssWUz2PY2PaTg +-> ssh-ed25519 MSF3dg T630RsKuZIF/bp+KITnIIWWHsg6M/VQGqbWQZxqT+AA +SraZcgZJVtmUzHF/XR9J7aK5t5EDNpkC/av/WJUT/G8 +--- /12G8pj9sbs591OM/ryhoLnSWWmzYcoqprk9uN/3g18 +‡%]yi"L H`a$)9ve.0mKv u"|1c-%"WFAh$j{M-DJ%>R zF)b;}sc;GlOwWr7%1llxGBArW%Q6fxEzvK`@CnHG_6;#g_vJG856!j6N=(e~OV034 zbIbEE3Jr71OUkTpNwdsO49oV;^^Pj_%gc5PibS`~*)iPRBv8S_*T=6aGo;+ayf8Z? zRX?!EE!QJ5I6KcNpvp1ASv#pPG%(zyFfhW&*_X>PD#F+`*wi^aJUP4~HLJk4*wHsI zx7^jEP(LWZ-!$CJJkKvMr^+eG0>d`nU^nBGbOleJpz;i3pZv6p;N+0vlz`$;FK?5y zpehdov(%Ja({z)hLdSAPvy2S4KrYt+?PC8@4@dJdFE7`81B)y_=Yoo=;_}42k}&V= z^rFyk_pE{}Pot2iVlG`>U4`7W8YZaGb<-_Pm_$)O*byHz$VwS{A$bU1sQGVUv`3@w0-W ecPd`$H)9dFy(ahE=GM)#&Mn+{{GDU9;wb ssh-ed25519 HY2yRg hrdS7Dl/j+u3XVfM79ZJpZSlre9TcD7DTQ+EEAT6kEE -avUO96P1h7w2BYWgrQ7GpUgdaCV9AZL7eOTTcF9gfro --> ssh-ed25519 CAWG4Q A5raRY1CAgFYZgoQ92GMyNejYNdHx/7Y6uTS+EjLPWA -FRFqT2Jz7qRcybaxkQTKHGl797LVXoHpYG4RZSrX/70 --> ssh-ed25519 MSF3dg D+R80Bg7W9AuiOMAqtGFZQl994dRBIegYRLmmTaeZ3o -BHvZsugRiuZ91b4jk91h30o3eF3hadSnVCwxXge95T8 --> BT/El`a-grease W{nq|Vm )bld 2Nl}4 N$#JGB4t -oLG+0S1aGfO/ohCfgGmhDhwwLi4H ---- 2I5C+FvBG/K1ZHh7C5QD39feTSLoFGwcTeZAmeILNsI -Wo d;C._(u G#vgɝyYl9ϵ.0x޽N./tBbK:Q\T_txm_Jޞ- \ No newline at end of file diff --git a/secrets/ovni-token.age b/secrets/ovni-token.age deleted file mode 100644 index 4378c388dd465e6d39681bbca5354136454855f8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 553 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCUlh%~AUN>_-A$`23A zDo)GIF7wp(Ov{eQiSY0;)Hm`AOESz(bu&-SbPv(bND0o*HQ*{q)s8eRb&X2$Fo|?c zOfGXaaC0xrG&Ts+&vSRlEh&#IiS(=THpz%OJ4OHKZauETk$V)xh7pJUF*9D4nagq`0)SI48Tr$=@Kr*)S?BGtkA% z-^A3zJlM(0Gbzh8t;{*xG|M-!(i7b_-(WZ6lyn7c$BKX~R|``M!<@);L%*<~j9~Y~ zf+};bEOTv>s;sj7ilnS^cgsqra7V6`vdSQXDt$9w-@KrF7tet3GT%ZM^U9n`KOb|` z$nZ$F3geKdv^4F?T(IA~bkmDc6N^(73QbJ)6e=r>O%#+0Lj$=?qbhv3baizV!c9F5 z^dpTN!#rK{GCa(RQYw#gR=1f`Si!D3=SW$xm)!n%D5i zN@(p(vjm3cj@_0PHOy!2h%MOzk;irohVPU`daU;a=K7W}SfdCXr`TOLcc1Brj_ Q4n26Eb9KfoqnGXB00g1M?EnA( diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 9844734..747b4aa 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -6,10 +6,9 @@ let safe = keys.hostGroup.safe ++ adminsKeys; in { - "gitlab-bsc-es-token.age".publicKeys = hut; "gitea-runner-token.age".publicKeys = hut; - "ovni-token.age".publicKeys = hut; - "nosv-token.age".publicKeys = hut; + "gitlab-runner-docker-token.age".publicKeys = hut; + "gitlab-runner-shell-token.age".publicKeys = hut; "nix-serve.age".publicKeys = hut; "jungle-robot-password.age".publicKeys = hut; -- 2.49.0 From 8860f76cad0b37d94afc27692449b930a12e778b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 Jul 2024 11:19:03 +0200 Subject: [PATCH 248/472] Allow other jobs to run in unused cores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current select mechanism was using the memory too as a consumable resource, which by default only sets 1 MiB per node. As each job already requests 1 MiB, it prevents other jobs from running. As we are not really concerned with memory usage, we only use the unused cores in the select criteria. Reviewed-by: Aleix Boné --- m/module/slurm-client.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 22ffae6..fbce762 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -83,6 +83,14 @@ in { # Reduce port range so we can allow only this range in the firewall SrunPortRange=60000-61000 + + # Use cores as consumable resources. In SLURM terms, a core may have + # multiple hardware threads (or CPUs). + SelectType=select/cons_tres + + # Ignore memory constraints and only use unused cores to share a node with + # other jobs. + SelectTypeParameters=CR_Core ''; }; -- 2.49.0 From f158cb63e8771eeeed9b18f80a40bee693f8123d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 Jul 2024 11:44:01 +0200 Subject: [PATCH 249/472] Set default SLURM job time limit to one hour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevents enless jobs from being left forever, while allow users to request a larger time limit. Reviewed-by: Aleix Boné --- m/module/slurm-client.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index fbce762..7b2acb9 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -47,8 +47,8 @@ in { ]; partitionName = [ - "owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP" - "all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP" + "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + "all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" ]; # See slurm.conf(5) for more details about these options. -- 2.49.0 From 504f9bb570632d6a9375fa3eb9f2bee600b2b613 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 Jul 2024 13:39:16 +0200 Subject: [PATCH 250/472] Set gitea and grafana log level to warn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevents filling the journal logs with information messages. Reviewed-by: Aleix Boné --- m/hut/gitea.nix | 1 + m/hut/monitoring.nix | 1 + 2 files changed, 2 insertions(+) diff --git a/m/hut/gitea.nix b/m/hut/gitea.nix index 22a494b..02e0d50 100644 --- a/m/hut/gitea.nix +++ b/m/hut/gitea.nix @@ -17,6 +17,7 @@ REGISTER_MANUAL_CONFIRM = true; ENABLE_NOTIFY_MAIL = true; }; + log.LEVEL = "Warn"; mailer = { ENABLED = true; diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 8abacbf..b04fa3f 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -31,6 +31,7 @@ }; feature_toggles.publicDashboards = true; "auth.anonymous".enabled = true; + log.level = "warn"; }; }; -- 2.49.0 From 28ce15d74d2f378cc26be3d60c5541f1c546a936 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 Jul 2024 16:12:16 +0200 Subject: [PATCH 251/472] Enable debuginfod daemon in owl nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WARNING: This will introduce noise, as the daemon wakes up from time to time to check for new packages. Reviewed-by: Aleix Boné --- m/owl1/configuration.nix | 1 + m/owl2/configuration.nix | 1 + 2 files changed, 2 insertions(+) diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index 1b9c4f3..ab2c63b 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -7,6 +7,7 @@ ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/slurm-hut-nix-store.nix + ../module/debuginfod.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 1b7d4be..e973833 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -7,6 +7,7 @@ ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/slurm-hut-nix-store.nix + ../module/debuginfod.nix ]; # Select the this using the ID to avoid mismatches -- 2.49.0 From d17be714ecf2c23f8aba6ea3ebfe9d7799272550 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 Jul 2024 18:01:45 +0200 Subject: [PATCH 252/472] Program shutdown for August 2nd for all machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base.nix | 1 + m/common/base/august-shutdown.nix | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 m/common/base/august-shutdown.nix diff --git a/m/common/base.nix b/m/common/base.nix index f6b74ea..bb0e4e2 100644 --- a/m/common/base.nix +++ b/m/common/base.nix @@ -3,6 +3,7 @@ # Includes the basic configuration for an Intel server. imports = [ ./base/agenix.nix + ./base/august-shutdown.nix ./base/boot.nix ./base/env.nix ./base/fs.nix diff --git a/m/common/base/august-shutdown.nix b/m/common/base/august-shutdown.nix new file mode 100644 index 0000000..5eaa803 --- /dev/null +++ b/m/common/base/august-shutdown.nix @@ -0,0 +1,13 @@ +{ + # Shutdown all machines on August 2nd at 11:00 AM, so we can protect the + # hardware from spurious electrical peaks on the yearly electrical cut for + # manteinance that starts on August 4th. + systemd.timers.august-shutdown = { + description = "Shutdown on August 2nd for maintenance"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnCalendar = "*-08-02 11:00:00"; + Unit = "systemd-poweroff.service"; + }; + }; +} -- 2.49.0 From 5ddae068afc262da78831e0846aa8559169f984c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 19 Jul 2024 17:53:10 +0200 Subject: [PATCH 253/472] Emulate other architectures in owl nodes too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows cross-compilation of packages for RISC-V that are known to try to run RISC-V programs in the host. Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 3 +-- m/module/emulation.nix | 3 +++ m/owl1/configuration.nix | 1 + m/owl2/configuration.nix | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 m/module/emulation.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 63cfe00..386d6ab 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -6,6 +6,7 @@ ../module/ceph.nix ../module/debuginfod.nix + ../module/emulation.nix ../module/slurm-client.nix ./gitlab-runner.nix ./monitoring.nix @@ -19,8 +20,6 @@ #./pxe.nix ]; - boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; - # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; diff --git a/m/module/emulation.nix b/m/module/emulation.nix new file mode 100644 index 0000000..ae63970 --- /dev/null +++ b/m/module/emulation.nix @@ -0,0 +1,3 @@ +{ + boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; +} diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index ab2c63b..5a87b86 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/xeon.nix ../module/ceph.nix + ../module/emulation.nix ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/slurm-hut-nix-store.nix diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index e973833..9bb010e 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/xeon.nix ../module/ceph.nix + ../module/emulation.nix ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/slurm-hut-nix-store.nix -- 2.49.0 From 0911d5b92ae641705d36be3254b7c7ad4def2188 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 Jul 2024 11:02:32 +0200 Subject: [PATCH 254/472] Don't mount the nix store in owl nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially we planned to run jobs in those nodes by sharing the same nix store from hut. However, these nodes are now used to build packages which are not available in hut. Users also ssh to the nodes, which doesn't mount the hut store, so it doesn't make much sense to keep mounting it. Reviewed-by: Aleix Boné --- m/owl1/configuration.nix | 1 - m/owl2/configuration.nix | 1 - 2 files changed, 2 deletions(-) diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index 5a87b86..7fc4a8f 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -7,7 +7,6 @@ ../module/emulation.nix ../module/slurm-client.nix ../module/slurm-firewall.nix - ../module/slurm-hut-nix-store.nix ../module/debuginfod.nix ]; diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 9bb010e..3ea9413 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -7,7 +7,6 @@ ../module/emulation.nix ../module/slurm-client.nix ../module/slurm-firewall.nix - ../module/slurm-hut-nix-store.nix ../module/debuginfod.nix ]; -- 2.49.0 From 152b71e71842b8c4e4ac8af8048e35ce023330ae Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 Jul 2024 11:20:02 +0200 Subject: [PATCH 255/472] Add 10 min shutdown jitter to avoid spikes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shutdown timer will fire at slightly different times for the different nodes, so we slowly decrease the power consumption. Reviewed-by: Aleix Boné --- m/common/base/august-shutdown.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/common/base/august-shutdown.nix b/m/common/base/august-shutdown.nix index 5eaa803..624340c 100644 --- a/m/common/base/august-shutdown.nix +++ b/m/common/base/august-shutdown.nix @@ -7,6 +7,7 @@ wantedBy = [ "timers.target" ]; timerConfig = { OnCalendar = "*-08-02 11:00:00"; + RandomizedDelaySec = "10min"; Unit = "systemd-poweroff.service"; }; }; -- 2.49.0 From c299d53146b6d6ac94144c5628b9cb0cf3e30bf5 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 7 Jun 2024 10:40:37 +0200 Subject: [PATCH 256/472] Add documentation section about GRUB chain loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- doc/install.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/install.md b/doc/install.md index 66a66c9..cee67c9 100644 --- a/doc/install.md +++ b/doc/install.md @@ -151,12 +151,26 @@ And update grub. # nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v ``` -## Chain NixOS in same disk +## Chain NixOS in same disk with other systems + +To install NixOS on a partition along another system which controls the GRUB, +first disable the grub device, so the GRUB is not installed in the disk by +NixOS (only the /boot files will be generated): + +``` +boot.loader.grub.device = "nodev"; +``` + +Then add the following entry to the old GRUB configuration: ``` menuentry 'NixOS' { insmod chain - set root=(hd3,1) + search --no-floppy --label nixos --set root configfile /boot/grub/grub.cfg } ``` + +The partition with NixOS must have the label "nixos" for it to be found. New +system configuration entries will be stored in the GRUB configuration managed +by NixOS, so there is no need to change the old GRUB settings. -- 2.49.0 From 50ad1d637c544a636ca47f03b1c9ff2924a05349 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 Jul 2024 12:36:20 +0200 Subject: [PATCH 257/472] Remove setLdLibraryPath and driSupport options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They have been removed from NixOS. The "hardware.opengl" group is now renamed to "hardware.graphics". See: https://github.com/NixOS/nixpkgs/commit/98cef4c27326d0f9e521654441929c1c7c64f8e9 Reviewed-by: Aleix Boné --- m/raccoon/configuration.nix | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index b2c7110..2a1bc68 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -21,11 +21,7 @@ # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; - hardware.opengl = { - enable = true; - driSupport = true; - setLdLibraryPath = true; - }; + hardware.graphics.enable = true; nixpkgs.config.allowUnfree = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; -- 2.49.0 From efd35a9cd1cf0afb8d053ad5ce669fd2379578a7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 22 Jul 2024 13:34:19 +0200 Subject: [PATCH 258/472] Set the serial console to ttyS1 in raccoon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently the ttyS0 console doesn't exist but ttyS1 does: raccoon% sudo stty -F /dev/ttyS0 stty: /dev/ttyS0: Input/output error raccoon% sudo stty -F /dev/ttyS1 speed 9600 baud; line = 0; -brkint -imaxbel The dmesg line agrees: 00:03: ttyS1 at I/O 0x2f8 (irq = 3, base_baud = 115200) is a 16550A The console configuration is then moved from base to xeon to allow changing it for the raccoon machine. Reviewed-by: Aleix Boné --- m/common/base/boot.nix | 6 ------ m/common/xeon.nix | 2 +- m/common/xeon/{getty.nix => console.nix} | 6 ++++++ m/raccoon/configuration.nix | 6 ++++++ 4 files changed, 13 insertions(+), 7 deletions(-) rename m/common/xeon/{getty.nix => console.nix} (63%) diff --git a/m/common/base/boot.nix b/m/common/base/boot.nix index 0fb0855..cfa4456 100644 --- a/m/common/base/boot.nix +++ b/m/common/base/boot.nix @@ -11,12 +11,6 @@ terminal_output --append serial ''; - # Enable serial console - boot.kernelParams = [ - "console=tty1" - "console=ttyS0,115200" - ]; - boot.kernel.sysctl = { "kernel.perf_event_paranoid" = lib.mkDefault "-1"; diff --git a/m/common/xeon.nix b/m/common/xeon.nix index 25d4121..30cf73c 100644 --- a/m/common/xeon.nix +++ b/m/common/xeon.nix @@ -3,7 +3,7 @@ imports = [ ./base.nix ./xeon/fs.nix - ./xeon/getty.nix + ./xeon/console.nix ./xeon/net.nix ]; } diff --git a/m/common/xeon/getty.nix b/m/common/xeon/console.nix similarity index 63% rename from m/common/xeon/getty.nix rename to m/common/xeon/console.nix index 40c5701..e4c3644 100644 --- a/m/common/xeon/getty.nix +++ b/m/common/xeon/console.nix @@ -5,4 +5,10 @@ wantedBy = [ "getty.target" ]; serviceConfig.Restart = "always"; }; + + # Enable serial console + boot.kernelParams = [ + "console=tty1" + "console=ttyS0,115200" + ]; } diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 2a1bc68..be9b10c 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -8,6 +8,12 @@ # Don't install Grub on the disk yet boot.loader.grub.device = "nodev"; + # Enable serial console + boot.kernelParams = [ + "console=tty1" + "console=ttyS1,115200" + ]; + networking = { hostName = "raccoon"; # Only BSC DNSs seem to be reachable from the office VLAN -- 2.49.0 From 15afbe94bd25275502f92cfc01e59bc27b134bbf Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 18 Sep 2024 15:21:01 +0200 Subject: [PATCH 259/472] Add dbautist user with access to hut Reviewed-by: Aleix Roca Nonell --- m/common/base/users.nix | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 1484830..40e87a1 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -100,6 +100,19 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch" ]; }; + + dbautist = { + uid = 5649; + isNormalUser = true; + home = "/home/Computational/dbautist"; + description = "Dylan Bautista Cases"; + group = "Computational"; + hosts = [ "hut" ]; + hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" + ]; + }; }; groups = { -- 2.49.0 From 260986b9f204d59a479a989de758565eb938ebf9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 18 Sep 2024 11:04:44 +0200 Subject: [PATCH 260/472] Delay nix-gc until /home is mounted Prevents starting the garbage collector before the remote FS are mounted, in particular /home. Otherwise, all the gcroots which have symlinks in /home will be considered stale and they will be removed. See: https://jungle.bsc.es/git/rarias/jungle/issues/79 Reviewed-by: Aleix Roca Nonell --- m/common/base/nix.nix | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/m/common/base/nix.nix b/m/common/base/nix.nix index aef1397..dc79647 100644 --- a/m/common/base/nix.nix +++ b/m/common/base/nix.nix @@ -32,6 +32,21 @@ }; }; + # The nix-gc.service can begin its execution *before* /home is mounted, + # causing it to remove all gcroots considering them as stale, as it cannot + # access the symlink. To prevent this problem, we force the service to wait + # until /home is mounted as well as other remote FS like /ceph. + systemd.services.nix-gc = { + # Start remote-fs.target if not already being started and fail if it fails + # to start. It will also be stopped if the remote-fs.target fails after + # starting successfully. + bindsTo = [ "remote-fs.target" ]; + # Wait until remote-fs.target fully starts before starting this one. + after = [ "remote-fs.target"]; + # Ensure we can access a remote path inside /home + unitConfig.ConditionPathExists = "/home/Computational"; + }; + # This value determines the NixOS release from which the default # settings for stateful data, like file locations and database versions # on your system were taken. It‘s perfectly fine and recommended to leave -- 2.49.0 From 6b282375f8e745e7fba4a67b6e830aea223e1a0e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 23 Jul 2024 16:15:26 +0200 Subject: [PATCH 261/472] Mount the NVME disk in /nvme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 386d6ab..0e875c8 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -23,6 +23,11 @@ # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; + fileSystems."/nvme" = { + fsType = "ext4"; + device = "/dev/disk/by-label/nvme"; + }; + networking = { hostName = "hut"; interfaces.eno1.ipv4.addresses = [ { -- 2.49.0 From fce4d89e1d83f8dd05ff4a3ec3d309155df0798a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 16 Sep 2024 16:33:34 +0200 Subject: [PATCH 262/472] Use nginx to serve website and other services MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using multiple tunels to forward all our services to the VM that serves jungle.bsc.es, just use nginx to redirect the traffic from hut. This allows adding custom rules for paths that are not posible otherwise. Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 1 + m/hut/nginx.nix | 61 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 m/hut/nginx.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 0e875c8..3a14eb6 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -17,6 +17,7 @@ ./gitea.nix ./msmtp.nix ./postgresql.nix + ./nginx.nix #./pxe.nix ]; diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix new file mode 100644 index 0000000..49598a2 --- /dev/null +++ b/m/hut/nginx.nix @@ -0,0 +1,61 @@ +{ theFlake, pkgs, ... }: +let + website = pkgs.stdenv.mkDerivation { + name = "jungle-web"; + src = theFlake; + buildInputs = [ pkgs.hugo ]; + buildPhase = '' + cd web + rm -rf public/ + hugo + ''; + installPhase = '' + cp -r public $out + ''; + }; +in +{ + services.nginx = { + enable = true; + virtualHosts."jungle.bsc.es" = { + root = "${website}"; + listen = [ + { + addr = "127.0.0.1"; + port = 80; + } + ]; + extraConfig = '' + location /git { + rewrite ^/git$ / break; + rewrite ^/git/(.*) /$1 break; + proxy_pass http://127.0.0.1:3000; + proxy_redirect http:// $scheme://; + } + location /cache { + rewrite ^/cache(.*) /$1 break; + proxy_pass http://127.0.0.1:5000; + proxy_redirect http:// $scheme://; + } + location /lists { + proxy_pass http://127.0.0.1:8081; + proxy_redirect http:// $scheme://; + } + location /grafana { + proxy_pass http://127.0.0.1:2342; + proxy_redirect http:// $scheme://; + # Websockets + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + location ~ ^/~(.+?)(/.*)?$ { + alias /ceph/home/$1/public_html$2; + index index.html index.htm; + autoindex on; + absolute_redirect off; + } + ''; + }; + }; +} -- 2.49.0 From 9ea7b2b4752037645ce9c555ca0c96d125ee6cf3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 16 Sep 2024 16:33:42 +0200 Subject: [PATCH 263/472] Add p command to paste files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 1 + m/hut/nginx.nix | 3 +++ m/hut/p.nix | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 m/hut/p.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 3a14eb6..f232df2 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -18,6 +18,7 @@ ./msmtp.nix ./postgresql.nix ./nginx.nix + ./p.nix #./pxe.nix ]; diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index 49598a2..553e06e 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -55,6 +55,9 @@ in autoindex on; absolute_redirect off; } + location /p/ { + alias /ceph/p/; + } ''; }; }; diff --git a/m/hut/p.nix b/m/hut/p.nix new file mode 100644 index 0000000..f53472d --- /dev/null +++ b/m/hut/p.nix @@ -0,0 +1,24 @@ +{ pkgs, ... }: +let + p = pkgs.writeShellScriptBin "p" '' + set -e + cd /ceph + pastedir="p/$USER" + mkdir -p "$pastedir" + + ext="txt" + + if [ -n "$1" ]; then + ext="$1" + fi + + out=$(mktemp "$pastedir/XXXXXXXX.$ext") + + cat > "$out" + chmod go+r "$out" + echo "https://jungle.bsc.es/$out" + ''; +in +{ + environment.systemPackages = with pkgs; [ p ]; +} -- 2.49.0 From 21feb01e7b8b07db080f851ea61e96bb56a44afc Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Sep 2024 11:19:30 +0200 Subject: [PATCH 264/472] Create paste directories in /ceph/p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure that all hut users have a paste directory in /ceph/p owned by themselves. We need to wait for the ceph mount point to create them, so we use a systemd service that waits for the remote-fs.target. Reviewed-by: Aleix Boné --- m/hut/p.nix | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/m/hut/p.nix b/m/hut/p.nix index f53472d..30bfc0b 100644 --- a/m/hut/p.nix +++ b/m/hut/p.nix @@ -1,4 +1,4 @@ -{ pkgs, ... }: +{ pkgs, lib, config, ... }: let p = pkgs.writeShellScriptBin "p" '' set -e @@ -21,4 +21,23 @@ let in { environment.systemPackages = with pkgs; [ p ]; + + # Make sure we have a directory per user. We cannot use the nice + # systemd-tmpfiles-setup.service service because this is a remote FS, and it + # may not be mounted when it runs. + systemd.services.create-paste-dirs = let + # Take only normal users in hut + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + commands = lib.concatLists (lib.mapAttrsToList + (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}" + ]) users); + script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands); + in { + enable = true; + wants = [ "remote-fs.target" ]; + after = [ "remote-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = script; + }; } -- 2.49.0 From 53e80b1f1969a35f84273e704bee86215c8ed0e4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Sep 2024 15:25:06 +0200 Subject: [PATCH 265/472] Ignore misc directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 17543c1..a8bf5f5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.swp /result +/misc -- 2.49.0 From 81c822e68ee971918126a07f3efeede6bae39723 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Sep 2024 15:24:38 +0200 Subject: [PATCH 266/472] Log the client IP not the proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/nginx.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index 553e06e..f42aad7 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -26,6 +26,11 @@ in } ]; extraConfig = '' + set_real_ip_from 127.0.0.1; + set_real_ip_from 84.88.52.107; + real_ip_recursive on; + real_ip_header X-Forwarded-For; + location /git { rewrite ^/git$ / break; rewrite ^/git/(.*) /$1 break; -- 2.49.0 From 56f6855af76694ea45125b5e864f33a513d57b3c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Sep 2024 15:44:22 +0200 Subject: [PATCH 267/472] Ignore logging requests from the gitea runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/nginx.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index f42aad7..a606894 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -37,6 +37,10 @@ in proxy_pass http://127.0.0.1:3000; proxy_redirect http:// $scheme://; } + # Ignore logging the gitea CI requesting tasks all the time + location /git/api/actions/runner.v1.RunnerService/FetchTask { + access_log off; + } location /cache { rewrite ^/cache(.*) /$1 break; proxy_pass http://127.0.0.1:5000; -- 2.49.0 From 6942f09f6919ecfa348f438c61c72fa53ecc94d4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 17 Oct 2024 13:35:45 +0200 Subject: [PATCH 268/472] Keep host header for Grafana requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was breaking requests due to CSRF check. See: https://github.com/grafana/grafana/issues/45117#issuecomment-1033842787 Reviewed-by: Aleix Boné --- m/hut/nginx.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index a606894..5a472e7 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -53,6 +53,7 @@ in location /grafana { proxy_pass http://127.0.0.1:2342; proxy_redirect http:// $scheme://; + proxy_set_header Host $host; # Websockets proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; -- 2.49.0 From abc74c544565e5bce23a8e2702fcbb4411dd3a1e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 17 Oct 2024 14:39:31 +0200 Subject: [PATCH 269/472] Use NVME as root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index f232df2..b349877 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -1,4 +1,4 @@ -{ config, pkgs, ... }: +{ config, pkgs, lib, ... }: { imports = [ @@ -23,11 +23,15 @@ ]; # Select the this using the ID to avoid mismatches - boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; + boot.loader.grub.device = "/dev/disk/by-id/nvme-INTEL_SSDPED1D960GAY_PHMB81220017960EGN"; - fileSystems."/nvme" = { - fsType = "ext4"; - device = "/dev/disk/by-label/nvme"; + fileSystems = { + "/" = lib.mkForce { + device = "/dev/disk/by-label/nixos-nvme"; + fsType = "ext4"; + neededForBoot = true; + options = [ "noatime" ]; + }; }; networking = { -- 2.49.0 From 025f6a0c0c36a9581fed95279e3c28713b0f66d6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 21 Oct 2024 14:28:17 +0200 Subject: [PATCH 270/472] Use SSD for boot, then switch to NVME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index b349877..66cc881 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -23,15 +23,21 @@ ]; # Select the this using the ID to avoid mismatches - boot.loader.grub.device = "/dev/disk/by-id/nvme-INTEL_SSDPED1D960GAY_PHMB81220017960EGN"; + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f"; fileSystems = { "/" = lib.mkForce { - device = "/dev/disk/by-label/nixos-nvme"; + device = "/dev/disk/by-label/nvme"; fsType = "ext4"; neededForBoot = true; options = [ "noatime" ]; }; + + "/boot" = lib.mkForce { + device = "/dev/disk/by-label/nixos-boot"; + fsType = "ext4"; + neededForBoot = true; + }; }; networking = { -- 2.49.0 From 0d4eebbb59082c744488224dba51eb1c29c03328 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Oct 2024 16:13:01 +0200 Subject: [PATCH 271/472] Remove exception to fetch task endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It causes the request to go to the website rather than the Gitea service. Reviewed-by: Aleix Boné --- m/hut/nginx.nix | 4 ---- 1 file changed, 4 deletions(-) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index 5a472e7..4e77eb2 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -37,10 +37,6 @@ in proxy_pass http://127.0.0.1:3000; proxy_redirect http:// $scheme://; } - # Ignore logging the gitea CI requesting tasks all the time - location /git/api/actions/runner.v1.RunnerService/FetchTask { - access_log off; - } location /cache { rewrite ^/cache(.*) /$1 break; proxy_pass http://127.0.0.1:5000; -- 2.49.0 From 22db38c98f8e54c12aaa6254c30e9be2a302ce2d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 12 Nov 2024 16:30:24 +0100 Subject: [PATCH 272/472] Add custom GPFS exporter for MN5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index b04fa3f..ad0e07a 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -102,6 +102,7 @@ "127.0.0.1:9252" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" "127.0.0.1:9341" # Slurm exporter + "127.0.0.1:9966" # GPFS custom exporter "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" ]; }]; -- 2.49.0 From cec49eb5fcf332164c811bc1e7861704a9cd7174 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 14 Nov 2024 12:21:13 +0100 Subject: [PATCH 273/472] Collect statistics from logged users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index ad0e07a..ec782cd 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -76,7 +76,7 @@ }; node = { enable = true; - enabledCollectors = [ "systemd" ]; + enabledCollectors = [ "systemd" "logind" ]; port = 9002; listenAddress = "127.0.0.1"; }; -- 2.49.0 From d335d69ba669e6131e45078643fdbd80aa73dd13 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 14 Jan 2025 15:51:34 +0100 Subject: [PATCH 274/472] Add BSC machines to ssh config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/ssh.nix | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/m/common/base/ssh.nix b/m/common/base/ssh.nix index 5e12ede..7d7b939 100644 --- a/m/common/base/ssh.nix +++ b/m/common/base/ssh.nix @@ -13,10 +13,16 @@ in Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es User git ProxyCommand nc -X connect -x hut:23080 %h %p + + # Connect to BSC machines via hut proxy too + Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es + ProxyCommand nc -X connect -x hut:23080 %h %p ''; programs.ssh.knownHosts = hostsKeys // { "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; + "glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; + "glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; }; } -- 2.49.0 From 8190523c30d8105c60960cec478e5ecf60784bef Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 14 Jan 2025 12:01:00 +0100 Subject: [PATCH 275/472] Add script to monitor GPFS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/gpfs-probe.nix | 31 +++++++++++++++++++++++++++++++ m/hut/gpfs-probe.sh | 18 ++++++++++++++++++ m/hut/monitoring.nix | 5 ++++- 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 m/hut/gpfs-probe.nix create mode 100755 m/hut/gpfs-probe.sh diff --git a/m/hut/gpfs-probe.nix b/m/hut/gpfs-probe.nix new file mode 100644 index 0000000..d4a0d98 --- /dev/null +++ b/m/hut/gpfs-probe.nix @@ -0,0 +1,31 @@ +{ pkgs, config, lib, ... }: +let + gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { } + '' + cp ${./gpfs-probe.sh} $out; + chmod +x $out + '' + ; +in +{ + # Use a new user to handle the SSH keys + users.groups.ssh-robot = { }; + users.users.ssh-robot = { + description = "SSH Robot"; + isNormalUser = true; + home = "/var/lib/ssh-robot"; + }; + + systemd.services.gpfs-probe = { + description = "Daemon to report GPFS latency via SSH"; + path = [ pkgs.openssh pkgs.netcat ]; + after = [ "network.target" ]; + wantedBy = [ "default.target" ]; + serviceConfig = { + Type = "simple"; + ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}"; + User = "ssh-robot"; + Group = "ssh-robot"; + }; + }; +} diff --git a/m/hut/gpfs-probe.sh b/m/hut/gpfs-probe.sh new file mode 100755 index 0000000..b8f7f82 --- /dev/null +++ b/m/hut/gpfs-probe.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +N=500 + +t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}") + +if [ -z "$t" ]; then + t="5.00" +fi + +cat < Date: Wed, 15 Jan 2025 12:43:45 +0100 Subject: [PATCH 276/472] Set nixpkgs to track nixos-24.11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- flake.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index 9ff6538..ca0a530 100644 --- a/flake.nix +++ b/flake.nix @@ -1,6 +1,6 @@ { inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; agenix.url = "github:ryantm/agenix"; agenix.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; -- 2.49.0 From ae34eacf4a599ebfc6b1ee6584b694b516338545 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 15 Jan 2025 12:44:51 +0100 Subject: [PATCH 277/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'agenix': 'github:ryantm/agenix/de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6' (2024-07-09) → 'github:ryantm/agenix/f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41' (2024-08-10) • Updated input 'bscpkgs': 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=de89197a4a7b162db7df9d41c9d07759d87c5709' (2024-04-24) → 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f' (2024-11-29) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/693bc46d169f5af9c992095736e82c3488bf7dbb' (2024-07-14) → 'github:NixOS/nixpkgs/9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc' (2025-01-14) Reviewed-by: Aleix Boné --- flake.lock | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/flake.lock b/flake.lock index 9def45b..9d72e48 100644 --- a/flake.lock +++ b/flake.lock @@ -10,11 +10,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1720546205, - "narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=", + "lastModified": 1723293904, + "narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=", "owner": "ryantm", "repo": "agenix", - "rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6", + "rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41", "type": "github" }, "original": { @@ -30,11 +30,11 @@ ] }, "locked": { - "lastModified": 1713974364, - "narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=", + "lastModified": 1732868163, + "narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=", "ref": "refs/heads/master", - "rev": "de89197a4a7b162db7df9d41c9d07759d87c5709", - "revCount": 937, + "rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f", + "revCount": 952, "type": "git", "url": "https://git.sr.ht/~rodarima/bscpkgs" }, @@ -88,16 +88,16 @@ }, "nixpkgs": { "locked": { - "lastModified": 1720957393, - "narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=", + "lastModified": 1736867362, + "narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "693bc46d169f5af9c992095736e82c3488bf7dbb", + "rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-unstable", + "ref": "nixos-24.11", "repo": "nixpkgs", "type": "github" } -- 2.49.0 From 371b0c7e766f2ac80c72687bfcb5897bdaac56a0 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 15 Jan 2025 13:16:10 +0100 Subject: [PATCH 278/472] Fix MPICH build by fetching upstream patches too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- pkgs/overlay.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index a36961b..5977ab0 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -11,7 +11,7 @@ final: prev: paths = [ pmix.dev pmix.out ]; }; in prev.mpich.overrideAttrs (old: { - patches = [ + patches = (old.patches or []) ++ [ # See https://github.com/pmodels/mpich/issues/6946 ./mpich-fix-hwtopo.patch ]; -- 2.49.0 From 2f6f6ba703c941f3d8f2382d8927874ace09e8bb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 15 Jan 2025 14:38:57 +0100 Subject: [PATCH 279/472] Update PM GitLab tokens to new URL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- secrets/gitlab-runner-docker-token.age | 16 ++++++++-------- secrets/gitlab-runner-shell-token.age | Bin 514 -> 516 bytes 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/secrets/gitlab-runner-docker-token.age b/secrets/gitlab-runner-docker-token.age index b11b2d2..103d6a6 100644 --- a/secrets/gitlab-runner-docker-token.age +++ b/secrets/gitlab-runner-docker-token.age @@ -1,9 +1,9 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg WvKK6U1wQtx2pbUDfuaUIXTQiCulDkz7hgUCSwMfMzQ -jLktUMqKuVxukqzz++pHOKvmucUQqeKYy5IwBma7KxY --> ssh-ed25519 CAWG4Q XKGuNNoYFl9bdZzsqYYTY7GsEt5sypLW4R+1uk78NmU -8dIA2GzRAwTGM5CDHSM2BUBsbXzEAUssWUz2PY2PaTg --> ssh-ed25519 MSF3dg T630RsKuZIF/bp+KITnIIWWHsg6M/VQGqbWQZxqT+AA -SraZcgZJVtmUzHF/XR9J7aK5t5EDNpkC/av/WJUT/G8 ---- /12G8pj9sbs591OM/ryhoLnSWWmzYcoqprk9uN/3g18 -‡%]yi"L H`a$)9ve.0mKv u"|1c-%"WFAh$j ssh-ed25519 HY2yRg 0sEIUEJBJQ0k0rBfHaOEbq1pNBqsPin4Xq85v0ds9jY +4wzjLapoOcq53nT2K3hSGED4jTDXci25GLHkl/fL4EI +-> ssh-ed25519 CAWG4Q f68ZbJGwXuCZVnqhwbh+8dh0X/MCdjEd+sVtPyBu/hU +u2TQreyWQvP6PGuwuUNKA/AL68560flqSlaItN3k41I +-> ssh-ed25519 MSF3dg HdrtRW2j7mfkLH0/4aJK5R0cWdjf56HYtEZgzHi9EAs +A6MF6tXmSUq2RF2bpmav0GFTRERwluSZGh2snP/KqkA +--- drsezqi7J/g8gm6N10SkfeAWnYct99WUraB5djLJqpo +g (J!M63eA?\1yeFN\/MN`K^+"Y^>dH]PJ`x{ڱySyPX{wS ^5XJP;v \ No newline at end of file diff --git a/secrets/gitlab-runner-shell-token.age b/secrets/gitlab-runner-shell-token.age index 634320c8c4eedab6bc37027744fdfe4a6808ebda..aaf939c41cac69729b7c7b56515c4dc741da056e 100644 GIT binary patch delta 462 zcmZo-X=A4lj=9HcvX5!;&=IW^J z<&j#I9g&{S8>bzQ69GmR(e69;~<(ZQPJB8vO-hW!CsT-nmY?aurc+1}kQ|zBMKKPxP H)Mf|(c|N9O delta 460 zcmZo+X=0h6Q*Y^0SQs8+T2hu%;S?5_o);CAnVeQ+U>0STWf)>wqFS;pFVg>6z9oF1MWUXhwr;9KnI8<<<}>QSg46yR?fZf2h67noD!lw=V( z@uPUXr%zCMhOtk6T1IekNO4L)aj2KKNm@{qhk;pYO0H?TNm8L>xuaP|hFc()Yk+pK zf2oI~d6}1&YrcU+mY;J$MOATmVqQs@cXoPFXt;Y;L6)acNK`SGuCA^^c~WU&xrdu+ zL_t|nP)=!@X+~jsSy;YLigBVro^PpTMU;DWYCvwPZ$X$RSL`xJZ8?^2zn`*iVK7H&NL&aqnY F6aYrKsYCz( -- 2.49.0 From 2b048123202725cb1c5e62a58da6c0d5a852f800 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 11 Feb 2025 12:56:30 +0100 Subject: [PATCH 280/472] Add new fox machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- flake.nix | 1 + keys.nix | 3 +- m/common/xeon/net.nix | 4 +++ m/fox/configuration.nix | 38 +++++++++++++++++++++++++ m/module/slurm-client.nix | 6 ++-- secrets/ceph-user.age | Bin 1002 -> 1023 bytes secrets/gitea-runner-token.age | 16 +++++------ secrets/gitlab-runner-docker-token.age | 17 +++++------ secrets/gitlab-runner-shell-token.age | 16 +++++------ secrets/jungle-robot-password.age | Bin 477 -> 477 bytes secrets/munge-key.age | Bin 2007 -> 2006 bytes secrets/nix-serve.age | Bin 641 -> 534 bytes 12 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 m/fox/configuration.nix diff --git a/flake.nix b/flake.nix index ca0a530..037bba2 100644 --- a/flake.nix +++ b/flake.nix @@ -25,6 +25,7 @@ in bay = mkConf "bay"; lake2 = mkConf "lake2"; raccoon = mkConf "raccoon"; + fox = mkConf "fox"; }; packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { diff --git a/keys.nix b/keys.nix index 681fcbc..a2b8c2c 100644 --- a/keys.nix +++ b/keys.nix @@ -9,10 +9,11 @@ rec { koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; + fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDa9lId4rB/EKGkkCCVOy0cuId2SYLs+8W8kx0kmpO1y fox"; }; hostGroup = with hosts; rec { - compute = [ owl1 owl2 ]; + compute = [ owl1 owl2 fox ]; playground = [ eudy koro ]; storage = [ bay lake2 ]; monitor = [ hut ]; diff --git a/m/common/xeon/net.nix b/m/common/xeon/net.nix index a5d87b6..ce1d3bc 100644 --- a/m/common/xeon/net.nix +++ b/m/common/xeon/net.nix @@ -85,6 +85,10 @@ 10.0.40.8 eudy xeon08 xeon08-eth0 10.0.42.8 eudy-ib xeon08-ib0 10.0.40.108 eudy-ipmi xeon08-ipmi0 + + # fox + 10.0.40.26 fox + 10.0.40.126 fox-ipmi ''; }; } diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix new file mode 100644 index 0000000..38d69be --- /dev/null +++ b/m/fox/configuration.nix @@ -0,0 +1,38 @@ +{ lib, config, pkgs, ... }: + +{ + imports = [ + ../common/xeon.nix + ../module/ceph.nix + ../module/emulation.nix + ../module/slurm-client.nix + ../module/slurm-firewall.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103"; + + # No swap, there is plenty of RAM + swapDevices = lib.mkForce []; + + boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ]; + boot.kernelModules = [ "kvm-amd" ]; + + hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; + hardware.cpu.intel.updateMicrocode = lib.mkForce false; + + networking = { + hostName = "fox"; + interfaces.enp1s0f0np0.ipv4.addresses = [ { + address = "10.0.40.26"; + prefixLength = 24; + } ]; + }; + + # Configure Nvidia driver to use with CUDA + hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; + hardware.graphics.enable = true; + nixpkgs.config.allowUnfree = true; + nixpkgs.config.nvidia.acceptLicense = true; + services.xserver.videoDrivers = [ "nvidia" ]; +} diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 7b2acb9..d745310 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -43,12 +43,14 @@ in { clusterName = "jungle"; nodeName = [ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" + "fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=2 Feature=fox" "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; partitionName = [ - "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" - "all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + "fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + "all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" ]; # See slurm.conf(5) for more details about these options. diff --git a/secrets/ceph-user.age b/secrets/ceph-user.age index f23e2ff0558e216abb9940ca7ebc26287849466d..880fbbf33f23b68b298d1efcfde58e2581d46ba7 100644 GIT binary patch delta 936 zcmaFG{-1qe?v1dqVgs)F|VN|(kc5-%ZiCboIcwu6yexQGdOQBh5UZQJQ zR<@C0M81GgPPv(3hGmY)WxiqF1(}suK~X8jzPTQTQIWpcp5eaQCh3k`z8U(Z zNigsU?x#p1#IaF6GX7E~UO6KIXaZzFFSIIT>ad>E(`-;~B-n(=APk%zdLg zolJuY%Ca+yvoey+^}U@evh&K*1N?%`9P>>olM2gx%$z*A5{tu~qTCIVLj!U`%*u<3 z^MVrn%FA7iQ#`WW4J#|7vdmIV-9mzV3rk!lpJf!U_tbVa$qe^)^A9d?t*k69H#Vt^ zDk!P&^365#Ov`uA4K>%V@U}1rPsw!T3a#>p@-7Ups3=T~a5lCGH1RLbsPym+GWB;a zjLHnj^h)>eOAE;jHgonwj{)rv*Rb$F1;Yr}Aop_1ob>$Md`II-FR%Iv^YlEAFwZt7XA9T^qwrSItC6KKGttE;Qv zT3MLx6Vd<=2 z3z%OqtTA?F_9{bKJN8}z?4&*N%&W;B_t@xrnad0S4qsI$6~7ab=Y+BD@^ W)3(hrFZtAL!abK?l>NcIj28f9#X8sk delta 915 zcmey*{)&BqPQ90bL84ide@1p`L3V~usA+hhLAHy5sfVkZiMMxgaIsNFd6tKFQeK`% zD3_O^Nm#z0zjL8aL2z=Izq!9pL{ec$vX7@pNpe`IM~P{2VWfUUU~ZCoF_*5LLUD11 zZfc5=si~o*LUFodVy>e?wndPBQgCQWSy*n4lc!mwhg(3Vsh?9|fRnqgu~|-lS9x(} zk%fz!dwPW{mr0bjvvGQ4S!S}CV_1=&d%9m%c~wbLXjMi6m^0SNF zJ@Z2ge9{6Uvpj<;48tsqid>B|qQVW0^Sn~iv~!HJodVOjd{VOOJ#Ze51;;^OCuIJzT;ovO>#EQv5QV4Kqx9i$X0+y}gq?jf={&D;%A24YW<&B7LK> z^Rq$L)mh|Po2Tif7o{c^r*h@Ec$Vhmq&Zt!1Z5P2M49+zTbPHWXor;qPnc$cP!X6J^P7MX+=rkGc{YMa(O7CD-wW*6ymx$8%mxH@v_>gp;uSD6%-Sw@xS zIVFdsL^4T8I3Rwi!m{$uh1aeu3;)$$_GJ?=kGm0l!M9KQq}t0z-}t!p Oii)dPl=!Z=un7P?#yQ3S diff --git a/secrets/gitea-runner-token.age b/secrets/gitea-runner-token.age index 1c799cf..31d52cf 100644 --- a/secrets/gitea-runner-token.age +++ b/secrets/gitea-runner-token.age @@ -1,9 +1,9 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg DQdgCk16Yu524BsrWVf0krnwWzDM6SeaJCgQipOfwCA -Ab9ocqra/UWJZI+QGMlxUhBu5AzqfjPgXl+ENIiHYGs --> ssh-ed25519 CAWG4Q KF9rGCenb3nf+wyz2hyVs/EUEbsmUs5R+1fBxlCibC8 -7++Kxbr3FHVdVfnFdHYdAuR0Tgfd+sRcO6WRss6LhEw --> ssh-ed25519 MSF3dg aUe4DhRsu4X8CFOEAnD/XM/o/0qHYSB522woCaAVh0I -GRcs5cm2YqA/lGhUtbpboBaz7mfgiLaCr+agaB7vACU ---- 9Q7Ou+Pxq+3RZilCb2dKC/pCFjZEt4rp5KnTUUU7WJ8 -1Mw4 :H@/gLtM,ƥ*zNV5mNoj1 $TG_E{%1ǯHAp \ No newline at end of file +-> ssh-ed25519 HY2yRg eRVX5yndWDLg9hw7sY1Iu8pJFy47luHvdL+zZGK2u1s +e1nXXiMW0ywkZYh2s6c7/quGMfBOJOaRhNQDjCD2Iyc +-> ssh-ed25519 CAWG4Q gYG7GRxRpJ0/5Wz0Z0J2wfLfkMFNmcy81dQEewM7gUA +lamdUdx+xOFWF1lmUM4x9TT0cJtKu9Sp7w9JHwm13u0 +-> ssh-ed25519 MSF3dg HEzfpR8alG6WPzhaEjAmmjOFoFcMSQUldx46dBsXri4 +OAD5H/zZGhfevYrFJzJrbNKPomKZDOS9Qx5tmTp78Jo +--- A0sMSiNXWaEIgRXR0x6UAIaluuVH6Zlv4CJ9sI0NXOw +6ph{>F|iv E}{ruƷ}^>c6j gGW:J3||Z \ No newline at end of file diff --git a/secrets/gitlab-runner-docker-token.age b/secrets/gitlab-runner-docker-token.age index 103d6a6..cd1432e 100644 --- a/secrets/gitlab-runner-docker-token.age +++ b/secrets/gitlab-runner-docker-token.age @@ -1,9 +1,10 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg 0sEIUEJBJQ0k0rBfHaOEbq1pNBqsPin4Xq85v0ds9jY -4wzjLapoOcq53nT2K3hSGED4jTDXci25GLHkl/fL4EI --> ssh-ed25519 CAWG4Q f68ZbJGwXuCZVnqhwbh+8dh0X/MCdjEd+sVtPyBu/hU -u2TQreyWQvP6PGuwuUNKA/AL68560flqSlaItN3k41I --> ssh-ed25519 MSF3dg HdrtRW2j7mfkLH0/4aJK5R0cWdjf56HYtEZgzHi9EAs -A6MF6tXmSUq2RF2bpmav0GFTRERwluSZGh2snP/KqkA ---- drsezqi7J/g8gm6N10SkfeAWnYct99WUraB5djLJqpo -g (J!M63eA?\1yeFN\/MN`K^+"Y^>dH]PJ`x{ڱySyPX{wS ^5XJP;v \ No newline at end of file +-> ssh-ed25519 HY2yRg GdmdkW+BqqwBgu30b846jv3J7jtCM+a3rgOERuA050A +FeGqM75jG9egesR+yyVKHm0/M+uBBp5Hclg4+qN0BR8 +-> ssh-ed25519 CAWG4Q a0wTWHgulQUYDAMZmXf3dOf6PdYgCqNtSylzWVVRNVM +Bx+WSYaiY4ZwlSZJo2a1XPMQmbKOU7F0tKAqVRLBOPo +-> ssh-ed25519 MSF3dg KccUvZZUbxbCrRWUWrX8KcHF6vQ5FV/BqUqI59G7dj4 +CFr7GXpZ9rPgy7HBfOyiYF9FnZUw6KcZwq9f7/0KaU8 +--- E0Rp6RR/8+o0jvB1lRdhnlabxvI6uu/IgL2ZpPXzTc8 +#H$F;%62rfX\Dn шȉx>&;cUI=M?TǸ"pxӭ\sbFWD{ +AW>?UHԳ \ No newline at end of file diff --git a/secrets/gitlab-runner-shell-token.age b/secrets/gitlab-runner-shell-token.age index aaf939c..7c718fd 100644 --- a/secrets/gitlab-runner-shell-token.age +++ b/secrets/gitlab-runner-shell-token.age @@ -1,9 +1,9 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg VY8s9s1zuHOv2axmIacwKg2ozsJnskHTQtslRZ3YI1M -fKkJuydLOzF/ciPYSYu4ziSCozdl6sowvDMYZmxqmHY --> ssh-ed25519 CAWG4Q 2ARFd/7RWQ/QOk47FnJFChaVBgoV4LE6EA+JHezkXgg -MV4g4Llv8Qcd/wUgJyoNG5AXb6o3aFTrOYGC+lXlSzw --> ssh-ed25519 MSF3dg SKoxWe8Mi8EkBjkESxStOCI5V4C0KYEXIOx7OdENgTA -p/owKwQ4e4pcGV+hqej2AfPU5QaM2i8VfxhlkjCM4Z4 ---- 0VWKU5CQiGbiOtQ2tsZZg88oZm1qcUDEnU5zDTtV+KU -ucl`M'Vk6Y!=Ly-OAzƘVEKR_qL|1V[)qƓLDy0_q)-T,ƪ_9 ?ib1 \ No newline at end of file +-> ssh-ed25519 HY2yRg xWRxJGWSzA5aplRYCYLB6aBwrUrQQJ2MtDYaD75V5nI +J07XF3NQiaYKKKNRcNWi9MloJD2wXHd+2K7bo6lF+QU +-> ssh-ed25519 CAWG4Q jNWymbyCczcm8RcaIEbFQBlOMALsuxTl4+pLUi0aR20 +z5NixlrRD+Y7Z/aFPs6hiDW4/lp8CBQCeJYpbuG9yYM +-> ssh-ed25519 MSF3dg QsUQloEKN3k1G49FQnNR/Do6ILgGpjFcw3zu5kk1Ako +IHwyFWUEWqCStNcFprnpBa8L5J6zKIsn+7HcgGRv3sM +--- oUia0fsL6opeYWACyXtHAu/Ld+bUIt/7S1VszYTvwgU +V*t2-7h&͢_!տ+(n (/}CNͷ|Nu5ù勚Kl"klOXyAe$ \ No newline at end of file diff --git a/secrets/jungle-robot-password.age b/secrets/jungle-robot-password.age index de9bf22ab4a215dacd44608e30f8f45b8b8c8cb7..7b8cd84284e84acf2ffa496148466fc590495dc0 100644 GIT binary patch delta 423 zcmcc1e3yBGPQ6=&fw5_&t6#8-r?$SAbEsFUai+FkRz$gDL`aBPW?r7VWr}aAg@2y0 z1($!Nho_OBL5P8yacYWVNM*WvWVuhScdDzee`ZQquy$H%Mp0%_X^~l&E0?aFLUD11 zZfc5=si~o*g0o||yGfuzh>L%+pL3SJg?UD$cU7pqgS-Ua2!rT#&oA))CW1sMS)g<-)NhW`2`5mhFR<}PU-fxeNYNg)^6 zULIcVDLKg@SsAXCx%nm)ex4P^j^UZkj#ZXnrDb`RPR_}ZTx|dOCi!`4+pV0;uvXLR z@I3Z)f1M0nmaH=@{%~z}z`_O74g7Rngy%0{+oyS7YQeqTFWKv5r<`lu+2#Jyqf24C T5l2f4=Zi=BxenCN&exSw;C=y1KdwJ{h6e`2~I@ z&c&4;&OTMHkyTD^#oA@5+5Xv%K_*VdVNQu*nZ9oNQEnFHT=}QxwqAd*`;c?feUHZv zAN>4VIbGW4#uU#I-xpV(ZuPvrQ9`KLSLj&GSwr28-cvdRmUOyX)qkBS%rtiy*WGln Thr4bj6}auWyzQ)NQ0DMNnQe?UTS(-l)h_XVxV(+Ub?Aextp7jZ&9ZmS=Iemy<_kMOt27il3vSkBfh1MPOq7 z#E;_P!LHe9o}RgxktW^+X~u~@hEW;Lj^UNs+Kwd_PF4N}W)&vsu7*x-l{wj5=E;#> zDG}i%RhebsL74%*720LSKH(X@-hs}p{$2%XMH%kluI^z@QTdUR;~B-n104%oe0)>W z&D?WJ9i3cCaxIFzJS+TClMTErLxMt#jWb=0U46p6^D{lUObc>zi~R!)JqwG%%ZiEv ziZcwe{1Zb>ydy#j0>WGk%9Dc3JuAHn@>~rjpJf!UcQW%2D$4W;42(*O2r7=$4@)*o zD|B)8G)~E|Fpf$~ad-1{FGM zvJl6N%&bDcsM74HfN=9*PcGlG^ziJm0E1lP6yu7hic*UzQ?Jb6s-n#N;Ph1g%z`A} z2=9;x%gRU>S9I%~9mCyC0u@qS49YXJD?Cio3tT+2!$VB6wDa^+48678Bhr1%GK!r- z>%+sGN(-}#!d4t4R%E%5em4f81S zugK>LHO$B}2=J^gj!bna!WQS%P?}R3^(%R($&>f zNXqpnG;lMpH1^X^_DxT7O^zt43et8=&Cd@s3Qlq?u}DnucP|Yri^xyrGUMOdYu8;W zcJ6;&n!$o4v#-d`)9zhsaCGUJ%a$jZ9fJNw&zdqjq4rnAwfcqXFHEd{8^uqWD9u;I z`oemaU>bmEaC&s_$aOzEcE7ONj@_z() zg#QL+%yBvGn6+<@W1mUCRjuQOg~v7Juf6*ED6G?P&lKAphl?9@nLfnst>pRWa*=V~ z;-joT-G8i7Ex361w5-Tifg|E&bvp~f9-h#dclyIiA?q#s#6OB3-SBhK@d*m`yH2tl z5J;@rk~HQ2-B+h?YxBpyc(;b{Re(Xmvpuss=Er_GH!D1N(aJQ%=$2!DqQBqWJ*Ab4 zMKj$jWXiS6QBvnhz8&g45+qR&)Mpg5^j7NQO0}v~f$raPdP^C&tG4og$emN^dUeJD z9>sNM`KG!lUNCRvXXQQacU@;A%MU}oH7xZ~>8DGZE$<1)UjG}vA*!V*2)o59ew(sTE)oTiJIxWsgPARo!Y%tRMbS|X9=oAyDtxR#yLQSKw5(^%S*xjB8?7zaxmoF=i_GGGO9H+zsXY!#`IKJInT5uiN*)qkhdDe>~kq#`PR$Bv41w<~Ie%)z<-j-G^1;bTbavH3QCj8}o zbNFS0_GRDYLN$}3IpY1F-71{DVMEHjbla_O{3S2ew$aHA?*J3QvJ$l=bzo{Ph5LOa_X!t(X&6ViWNBEyn5EunVV;BWKAypdtmv# z4`*&&D=AD)T$yu2n1#n>&vJ_szn49_aBD?Cf^!Pr>7R#f<5r#d@UT(msH<#J$@#mC zd4Wkh?rL9~CqInV3Vig|2m=VmF}GbSC6jHkruEv{ae^n+I3DeK*slW($=%< aFHSkdqO?m-h;!#8|L${N_m`~YHwFOQQYc#h delta 1927 zcmcb{f1Q7VPJMobyQyhvN?BH7mb+!DaYa%}WVyMaQLbBFm~&cckx5}`PN{FcvA>0f z1($JFmXmh2c5s@hnXj*bX=ftW1&~1yFqDWKvGaZKt@@%wwtqOiltd$ctLo!V@SGzhkH_y zsgYYyZm6>fmy1W0TTZ2Oo}r7Wsj-)Np+TsTp=YJ9qlbQ2qHnHGWMqC$NoHAzc0^+6 z#E;_PRYA^Gj{XH%W!@nn+J%{siKd<=ZpB3rRhcQJ9>(GR&R(U3QO4$lK6w^gW~rX# z$)$P8;Rc1dzB%PCC3!hM?&XeI`j%ff(_iWEle_ey^9M>e0*KPLK8DhQk~5!jjKYrO1%orQmc$plRf>k3oU|* z%k=a8tD?dj-J|?0-JJ4+4FW?mf?Y}yT}=WfpJf!U5A(=QH?VY!aPmxxEC}>8%8Sg5 z^a#nR$Z^SVF)?y23Cj&Gj?(rH3Cu|6DvR(jFv~D3ax70X$ThAobj{9kbT=&XH_x|l zD~k**DGN_2D9g^Us0M6y zNV6h;&+L-O;s~RPkesNHbS_`N)} zAlx{}y)fA)+|xZIr^qBJGBr1}KFhb=XJS63k{Sl_fr+chdQ$tc6vwLB=oC^glwJh8+z*U3C8yU?;MIp563G1o6Q zGo33+`cmp7i$2|H@*P!ik1p}wQfvy|@b|T|$D0oe=H-@MtD2{K_<}?92HAQa+4Fqc zSc92vx=oZ;KjwXy;kMoS>vQ;B%fsH=tbY=cz;kXv#m0h_GE)=&-*N4^v%vGd>XxZD zWW>GHuh+h?j&fo8e6M-#k2eC6MwtiqM6d)Ie7e7G8LzKuntxvp^XlS=)!7mkS<`>& z|9a-I=)05hgZNJ_75W>Vxh~`^zW2AIsGhZEW%M-<)kfXZEa%$z#eUY#53JV}eqzPh z=KMR~!HA)Vo_`hx{|F-LGSRimJ@mrB)vqS`zkkFZ)fy{!^1aeHAL= z*>ychVDc9|-9XlNuPwga;pVi`{c+;pyZ_s^n1(MgD#%_@@lE-I&Kj$4Ix{SL-#(jv z?MmUT*TEk=gSXkNcQ^60>vnn_>RHlo+}cZ->yl{o zD%NV>ZE=^AcUCX-sb@Fe!t$c}yx-l1#R``+P5D(Ul4tN+`b_WMcyeb&;#To3r_MeK z$lf?pBz~Sp=tY-`$$OJ79nUz?C>3FT;a`ownMvBaJ{oE7%?r#YeAwhR z=+2)u-Qdl~KEVqQJ2@+q(IR*v1j=JAVgJ9+^k(XUUYTslcm@l@5-OJ||+Ms+O!epm-ajx7wZ4ZsK zx;^dl-#hECcy)QgwKoRO6V?@c-mP_e8~21`YH4goraWhVzF3Fz;QbG3jPX0`{;0+u z*kIo`VeV|{rX?%cI9mAc>(1ZweP{KVg`1Pk@NP*8j%GL5#Nw~{Ve6-N`~|@AlWF{&Ck_4xFXcg$Slbuyez`c!!p0bBP%DWtjt$GpwJ?}$T=vV zD=Wp()5jw-+t@HQ%Og3}J0+mVr7SDkpu*5QJ=njv%&a6gBQm41I432POV>`JxHv;M zHO0u()X-AF*)iPRBv8T8K;J*R$UG=i-^nj0-5||1%2hwKAWB=?EITWt#6Q&8xgx+Y zvDCRJz|fT|J)_($Kiw=cF(e?zCDb+Fv^1rs+^oCeUJJA?Q{dDqU=bnWA{(aIUibDc_)ZjP4Je0 zV(Y}qTOLhzmfd#dWSQ9I=n&!ji`?}IYUN^l=GTrtUHW;$^@1{AfeS~kY(Dx|>Bh7e z;npA5Tw5~^9LVcw7PHyqSHHHs!O)SOxXDjM_D&Ap&$(Wcr!PG3f9J>r cg=H_EX$j<(&;I-O75me@?8i2Gy;M1& delta 587 zcmbQn(#SeNx8Bgi#6>%-tRf>a-Mhp?U%%AFsL-!8EH5&`EGfg>B(gBO($_dD%c(q+ z%fK@=EZ5C3%_P(!IM*P>z|h|;s@%&k%qS~IKR4Jn$E4iNz&F6ur^qv%OV>`JxHv;M zHO0u()X-AF*)iPRBv8S_B+t8|G*#a@OW)W#)XmgU+f~0Z-NQF4%dx0Dy}~%yGrT+^ zxzMeoGBlsdB-_-_E88cus5I3lGBY5%$jHaBJkTdRw=~h;v)Cm!Oy9z+D6y;{CpC29 zNAY@(g7i>FHxHkHVv}-%{!Ez%={3$k+b6U{s;EVa`u>qEWrLJD#WqdfHUOOw-rEDQ|G(p)Vn&COlH zjJztU^dr2=t12QZyxmQ?B3&~wL)?A+k|N6W!^~Yx6T^b5%$!n^xpZ}P72HcQJ-nTx zQaqe03oWw@oPvE)OifGzGLl@&L;MVlEnF=^+)^qs3sN&3xt8u<+}(25T_t$ep6ccx z$7cujg=EC$9kJ-u`ROsI_{95Lwgr1;-c;Y+`$hYUc-Gg;UwbS!YJRku!mKGeamSp< zNB=HbZ1%kI)xs)2r}y#{yTe(BT=h4kM d%g)V_kduCJHS|}pp_%mYeG7ehlD@Cl1OV#6+5P|k -- 2.49.0 From 14b192b1d9147054c5ee6f1dde1619ff26df5a91 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 12 Feb 2025 11:36:53 +0100 Subject: [PATCH 281/472] Add fox IPMI monitoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use agenix to store the credentials safely. Reviewed-by: Aleix Boné --- m/hut/ipmi.yml | 13 ------------- m/hut/monitoring.nix | 17 +++++++++++++++-- secrets/ipmi.yml.age | Bin 0 -> 1184 bytes secrets/secrets.nix | 1 + 4 files changed, 16 insertions(+), 15 deletions(-) delete mode 100644 m/hut/ipmi.yml create mode 100644 secrets/ipmi.yml.age diff --git a/m/hut/ipmi.yml b/m/hut/ipmi.yml deleted file mode 100644 index 0d68a53..0000000 --- a/m/hut/ipmi.yml +++ /dev/null @@ -1,13 +0,0 @@ -modules: - default: - collectors: - - bmc - - ipmi - - chassis - - lan: - collectors: - - ipmi - - chassis - user: "" - pass: "" diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index f7c7421..ccfb575 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -12,6 +12,8 @@ mode = "400"; }; + age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age; + services.grafana = { enable = true; settings = { @@ -73,8 +75,8 @@ enable = true; group = "root"; user = "root"; - configFile = ./ipmi.yml; - #extraFlags = [ "--log.level=debug" ]; + configFile = config.age.secrets.ipmiYml.path; + # extraFlags = [ "--log.level=debug" ]; listenAddress = "127.0.0.1"; }; node = { @@ -248,6 +250,17 @@ module = [ "raccoon" ]; }; } + { + job_name = "ipmi-fox"; + metrics_path = "/ipmi"; + static_configs = [ + { targets = [ "127.0.0.1:9290" ]; } + ]; + params = { + target = [ "10.0.40.126" ]; + module = [ "fox" ]; + }; + } ]; }; } diff --git a/secrets/ipmi.yml.age b/secrets/ipmi.yml.age new file mode 100644 index 0000000000000000000000000000000000000000..ec99e58f0e378b4bc7c7cdf61e3e3f668d1aa412 GIT binary patch literal 1184 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCUlh%~AUN>@nFFb?p{ zDJw5C*H14m^mGjOst7f53eq>NFbq#|3h>e|HI59(jL6I~EatLs4oEeyG!F4~OEF7I z4+~6obS`lUvdr_yC=2w?4=u3F2=y~DboM9+OGmfO*)iPRBv2tYB(cQV!^qn**C(Ph zx5P3qDbg^a$}p+IFW3A@aM-leWRsRhQ##ih~m9=N> zA2ogNllsEwvcsxcc;S!5Yx$QybW}1@Q?A}xd|}E_!*bsMr-YwHAFZp7*Cfn$`cWM5 zc8Tq@t`~-fCJ3A9?_Kjq=K41URjUqGm-n^F&ZY7<(y#8;T;3_a=XbW_{A026zMhi5 z8Oqu(Y_a}ut^0$|6Hoo`H{Ouv8KJ#0^iFp3>NvM;+0zTA{#Cyza=WtVsrmco`}1E2 za?QDR|73Of^;@4N)QT*#`LtEktVh1M;^l$0e2;eMmh~ly-J8gHTIHQUWUFl}OUh%% zvv(@Csz2|$^jX#9x!wMCr&zr+EK-tL=Uwh)4RkqqyDom;%48+`f_LllCkK5$R_V8M zceZNlE4hhm=C=J&jIToNIWFCt!&U8{(O-L%H(sd2i+6hPeI7~97e|t%k7bo+WXP3G zX80?0$?nrv?U1Euhxc2%>Zu>^nfi2fh}%gK`Fre=2}uktC6iWncP!OYd=^?eU8{(J z@q>ps|Fx&-Gu|(Xu1H?>{>h4<6GzUR+xkKCXZ7a!?|2{AA7c04JNJ!@@O_scz&OuW3~C;RK}&Hv(Ztlcy=^?Gl^yWB)`h>%j3h#dK`i;PdxO%faR^Y KxWVL#@=gF=iv}wI literal 0 HcmV?d00001 diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 747b4aa..ac3eac3 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -11,6 +11,7 @@ in "gitlab-runner-shell-token.age".publicKeys = hut; "nix-serve.age".publicKeys = hut; "jungle-robot-password.age".publicKeys = hut; + "ipmi.yml.age".publicKeys = hut; "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; -- 2.49.0 From 4f5c8dbbafc92d20859eda4a530dffa2cce0b244 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 12 Feb 2025 12:14:40 +0100 Subject: [PATCH 282/472] Use IPMI host names instead of IP addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/xeon/net.nix | 20 ++++++++++---------- m/hut/monitoring.nix | 4 ++-- m/hut/targets.yml | 22 +++++++++++----------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/m/common/xeon/net.nix b/m/common/xeon/net.nix index ce1d3bc..965bc1f 100644 --- a/m/common/xeon/net.nix +++ b/m/common/xeon/net.nix @@ -34,37 +34,37 @@ # Node Entry for node: mds01 (ID=72) 10.0.40.40 bay mds01 mds01-eth0 10.0.42.40 bay-ib mds01-ib0 - 10.0.40.141 bay-ipmi mds01-ipmi0 + 10.0.40.141 bay-ipmi mds01-ipmi0 mds01-ipmi # Node Entry for node: oss01 (ID=73) 10.0.40.41 oss01 oss01-eth0 10.0.42.41 oss01-ib0 - 10.0.40.142 oss01-ipmi0 + 10.0.40.142 oss01-ipmi0 oss01-ipmi # Node Entry for node: oss02 (ID=74) 10.0.40.42 lake2 oss02 oss02-eth0 10.0.42.42 lake2-ib oss02-ib0 - 10.0.40.143 lake2-ipmi oss02-ipmi0 + 10.0.40.143 lake2-ipmi oss02-ipmi0 oss02-ipmi # Node Entry for node: xeon01 (ID=15) 10.0.40.1 owl1 xeon01 xeon01-eth0 10.0.42.1 owl1-ib xeon01-ib0 - 10.0.40.101 owl1-ipmi xeon01-ipmi0 + 10.0.40.101 owl1-ipmi xeon01-ipmi0 xeon01-ipmi # Node Entry for node: xeon02 (ID=16) 10.0.40.2 owl2 xeon02 xeon02-eth0 10.0.42.2 owl2-ib xeon02-ib0 - 10.0.40.102 owl2-ipmi xeon02-ipmi0 + 10.0.40.102 owl2-ipmi xeon02-ipmi0 xeon02-ipmi # Node Entry for node: xeon03 (ID=17) 10.0.40.3 xeon03 xeon03-eth0 10.0.42.3 xeon03-ib0 - 10.0.40.103 xeon03-ipmi0 + 10.0.40.103 xeon03-ipmi0 xeon03-ipmi # Node Entry for node: xeon04 (ID=18) 10.0.40.4 xeon04 xeon04-eth0 10.0.42.4 xeon04-ib0 - 10.0.40.104 xeon04-ipmi0 + 10.0.40.104 xeon04-ipmi0 xeon04-ipmi # Node Entry for node: xeon05 (ID=19) 10.0.40.5 koro xeon05 xeon05-eth0 @@ -74,17 +74,17 @@ # Node Entry for node: xeon06 (ID=20) 10.0.40.6 xeon06 xeon06-eth0 10.0.42.6 xeon06-ib0 - 10.0.40.106 xeon06-ipmi0 + 10.0.40.106 xeon06-ipmi0 xeon06-ipmi # Node Entry for node: xeon07 (ID=21) 10.0.40.7 hut xeon07 xeon07-eth0 10.0.42.7 hut-ib xeon07-ib0 - 10.0.40.107 hut-ipmi xeon07-ipmi0 + 10.0.40.107 hut-ipmi xeon07-ipmi0 xeon07-ipmi # Node Entry for node: xeon08 (ID=22) 10.0.40.8 eudy xeon08 xeon08-eth0 10.0.42.8 eudy-ib xeon08-ib0 - 10.0.40.108 eudy-ipmi xeon08-ipmi0 + 10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi # fox 10.0.40.26 fox diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index ccfb575..ee4983a 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -208,7 +208,7 @@ # Sets the "instance" label with the remote host we are querying source_labels = [ "__param_target" ]; separator = ";"; - regex = "(.*)"; + regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end target_label = "instance"; replacement = "\${1}"; action = "replace"; @@ -257,7 +257,7 @@ { targets = [ "127.0.0.1:9290" ]; } ]; params = { - target = [ "10.0.40.126" ]; + target = [ "fox-ipmi" ]; module = [ "fox" ]; }; } diff --git a/m/hut/targets.yml b/m/hut/targets.yml index 8924735..c04f864 100644 --- a/m/hut/targets.yml +++ b/m/hut/targets.yml @@ -1,15 +1,15 @@ - targets: - - 10.0.40.101 - - 10.0.40.102 - - 10.0.40.103 - - 10.0.40.104 - - 10.0.40.105 - - 10.0.40.106 - - 10.0.40.107 - - 10.0.40.108 + - owl1-ipmi + - owl2-ipmi + - xeon03-ipmi + - xeon04-ipmi + - koro-ipmi + - xeon06-ipmi + - hut-ipmi + - eudy-ipmi # Storage - - 10.0.40.141 - - 10.0.40.142 - - 10.0.40.143 + - bay-ipmi + - oss01-ipmi + - lake2-ipmi labels: job: ipmi-lan -- 2.49.0 From f887dacdea1ae22c8bd695517ce05f008d349fe1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 12 Feb 2025 15:02:18 +0100 Subject: [PATCH 283/472] Exclude fox from being suspended by slurm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/slurm-client.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index d745310..4f0c88a 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -78,7 +78,7 @@ in { SuspendTimeout=60 ResumeProgram=${resumeProgram} ResumeTimeout=300 - SuspendExcNodes=hut + SuspendExcNodes=hut,fox # Turn the nodes off after 1 hour of inactivity SuspendTime=3600 -- 2.49.0 From 71164400d4280e29369a770ac23f92aa1764d65a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 12 Feb 2025 15:49:55 +0100 Subject: [PATCH 284/472] Mount NVME disks in /nvme{0,1} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 38d69be..60ab5b0 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -35,4 +35,25 @@ nixpkgs.config.allowUnfree = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; + + # Mount NVME disks + fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; }; + fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; }; + + # Make a /nvme{0,1}/$USER directory for each user. + systemd.services.create-nvme-dirs = let + # Take only normal users in fox + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + commands = lib.concatLists (lib.mapAttrsToList + (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}" + ]) users); + script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands); + in { + enable = true; + wants = [ "local-fs.target" ]; + after = [ "local-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = script; + }; } -- 2.49.0 From 163434af09e11f84c8335bdb58952bbfe26f0ec2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 12 Feb 2025 16:39:51 +0100 Subject: [PATCH 285/472] Add dalvare1 user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 40e87a1..ef8d7d2 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -113,6 +113,19 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" ]; }; + + dalvare1 = { + uid = 2758; + isNormalUser = true; + home = "/home/Computational/dalvare1"; + description = "David Álvarez"; + group = "Computational"; + hosts = [ "hut" ]; + hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" + ]; + }; }; groups = { -- 2.49.0 From fe16ea373ff0abfe9a20eaf637653f1cce484a6c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 12 Feb 2025 16:46:56 +0100 Subject: [PATCH 286/472] Add users to fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index ef8d7d2..6d93285 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -68,7 +68,7 @@ home = "/home/Computational/anavarro"; description = "Antoni Navarro"; group = "Computational"; - hosts = [ "hut" "raccoon" ]; + hosts = [ "hut" "raccoon" "fox" ]; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" @@ -120,7 +120,7 @@ home = "/home/Computational/dalvare1"; description = "David Álvarez"; group = "Computational"; - hosts = [ "hut" ]; + hosts = [ "hut" "fox" ]; hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" -- 2.49.0 From 5487a93972e1e6d9f7baa3de7c99d00dac4a8390 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 13 Feb 2025 14:47:38 +0100 Subject: [PATCH 287/472] Reject SSH connections without SLURM allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 16 ++++++++++++++++ m/module/slurm-client.nix | 20 ++++++++++++++++++++ pkgs/overlay.nix | 12 ++++++++++++ 3 files changed, 48 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 60ab5b0..97ac686 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -56,4 +56,20 @@ wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = script; }; + + # Only allow SSH connections from users who have a SLURM allocation + # See: https://slurm.schedmd.com/pam_slurm_adopt.html + security.pam.services.sshd.rules.account.slurm = { + control = "required"; + enable = true; + modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so"; + args = [ "log_level=debug5" ]; + order = 999999; # Make it last one + }; + + # Disable systemd session (pam_systemd.so) as it will conflict with the + # pam_slurm_adopt.so module. What happens is that the shell is first adopted + # into the slurmstepd task and then into the systemd session, which is not + # what we want, otherwise it will linger even if all jobs are gone. + security.pam.services.sshd.startSession = lib.mkForce false; } diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 4f0c88a..d7cb070 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -93,9 +93,29 @@ in { # Ignore memory constraints and only use unused cores to share a node with # other jobs. SelectTypeParameters=CR_Core + + # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html + # This sets up the "extern" step into which ssh-launched processes will be + # adopted. Alloc runs the prolog at job allocation (salloc) rather than + # when a task runs (srun) so we can ssh early. + PrologFlags=Alloc,Contain,X11 + + # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes + # adopted by the external step, similar to tasks running in regular steps + # LaunchParameters=ulimit_pam_adopt + SlurmdDebug=debug5 + #DebugFlags=Protocol,Cgroup + ''; + + extraCgroupConfig = '' + CgroupPlugin=cgroup/v2 + #ConstrainCores=yes ''; }; + # Place the slurm config in /etc as this will be required by PAM + environment.etc.slurm.source = config.services.slurm.etcSlurm; + age.secrets.mungeKey = { file = ../../secrets/munge-key.age; owner = "munge"; diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 5977ab0..2eb9229 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -39,6 +39,18 @@ final: prev: # See https://bugs.schedmd.com/show_bug.cgi?id=19324 ./slurm-rank-expansion.patch ]; + # Install also the pam_slurm_adopt library to restrict users from accessing + # nodes with no job allocated. + postBuild = (old.postBuild or "") + '' + pushd contribs/pam_slurm_adopt + make "PAM_DIR=$out/lib/security" + popd + ''; + postInstall = (old.postInstall or "") + '' + pushd contribs/pam_slurm_adopt + make "PAM_DIR=$out/lib/security" install + popd + ''; }); prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; -- 2.49.0 From 9fd35a9ce498815ea52d5cdcc3935563030cbd62 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 14 Feb 2025 16:36:57 +0100 Subject: [PATCH 288/472] Don't move doc in web output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/nginx.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index 4e77eb2..ef7c416 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -12,6 +12,8 @@ let installPhase = '' cp -r public $out ''; + # Don't mess doc/ + dontFixup = true; }; in { -- 2.49.0 From 5c549faaa850f8a99a002582ce442983660a007a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 25 Feb 2025 14:33:11 +0100 Subject: [PATCH 289/472] Add abonerib user to fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 6d93285..ba2e604 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -81,7 +81,7 @@ home = "/home/Computational/abonerib"; description = "Aleix Boné"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" "raccoon" ]; + hosts = [ "owl1" "owl2" "hut" "raccoon" "fox" ]; hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" -- 2.49.0 From 7c55d10ceb3494cf003f3622a837f14101b49611 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Mar 2025 11:04:19 +0100 Subject: [PATCH 290/472] Adjust fox slurm config after disabling SMT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/slurm-client.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index d7cb070..7acbf65 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -43,7 +43,7 @@ in { clusterName = "jungle"; nodeName = [ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" - "fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=2 Feature=fox" + "fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=1 Feature=fox" "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; -- 2.49.0 From 18f25307abce5961ddef7674e2518b29c63af60c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 28 Mar 2025 11:53:33 +0100 Subject: [PATCH 291/472] Add varcila user to hut and fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index ba2e604..229be8c 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -126,6 +126,19 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" ]; }; + + varcila = { + uid = 5650; + isNormalUser = true; + home = "/home/Computational/varcila"; + description = "Vincent Arcila"; + group = "Computational"; + hosts = [ "hut" "fox" ]; + hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch" + ]; + }; }; groups = { -- 2.49.0 From 2c7211ffa3deb73558bc61ef977ee876a1fd23b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 7 Apr 2025 16:17:32 +0200 Subject: [PATCH 292/472] Remove SLURM partition all MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We no longer have homogeneous nodes so it doesn't make much sense to allocate a mix of them. Reviewed-by: Aleix Boné --- m/module/slurm-client.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 7acbf65..46478a8 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -50,7 +50,6 @@ in { partitionName = [ "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" "fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" - "all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" ]; # See slurm.conf(5) for more details about these options. -- 2.49.0 From 3f4b4fb810440cefef68d60023b8d34af0e3bf7b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 24 Jan 2025 13:00:54 +0100 Subject: [PATCH 293/472] Add new GitLab runner for gitlab.bsc.es MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It uses docker based on alpine and the host nix store, so we can perform builds but isolate them from the system. Reviewed-by: Aleix Boné --- m/hut/gitlab-runner.nix | 81 ++++++++++++++++++++++++++--- secrets/gitlab-bsc-docker-token.age | 11 ++++ secrets/secrets.nix | 1 + 3 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 secrets/gitlab-bsc-docker-token.age diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index 226099b..9241e5d 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -1,8 +1,9 @@ { pkgs, lib, config, ... }: { - age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age; - age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age; + age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age; + age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age; + age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age; services.gitlab-runner = { enable = true; @@ -21,21 +22,89 @@ "--docker-network-mode host" ]; environmentVariables = { - https_proxy = "http://localhost:23080"; - http_proxy = "http://localhost:23080"; + https_proxy = "http://hut:23080"; + http_proxy = "http://hut:23080"; }; }; in { # For pm.bsc.es/gitlab gitlab-pm-shell = common-shell // { - authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path; + authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path; }; gitlab-pm-docker = common-docker // { - authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path; + authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path; + }; + + gitlab-bsc-docker = { + # gitlab.bsc.es still uses the old token mechanism + registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path; + tagList = [ "docker" "hut" ]; + environmentVariables = { + # We cannot access the hut local interface from docker, so we connect + # to hut directly via the ethernet one. + https_proxy = "http://hut:23080"; + http_proxy = "http://hut:23080"; + }; + executor = "docker"; + dockerImage = "alpine"; + dockerVolumes = [ + "/nix/store:/nix/store:ro" + "/nix/var/nix/db:/nix/var/nix/db:ro" + "/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro" + ]; + dockerExtraHosts = [ + # Required to pass the proxy via hut + "hut:10.0.40.7" + ]; + dockerDisableCache = true; + registrationFlags = [ + # Increase build log length to 64 MiB + "--output-limit 65536" + ]; + preBuildScript = pkgs.writeScript "setup-container" '' + mkdir -p -m 0755 /nix/var/log/nix/drvs + mkdir -p -m 0755 /nix/var/nix/gcroots + mkdir -p -m 0755 /nix/var/nix/profiles + mkdir -p -m 0755 /nix/var/nix/temproots + mkdir -p -m 0755 /nix/var/nix/userpool + mkdir -p -m 1777 /nix/var/nix/gcroots/per-user + mkdir -p -m 1777 /nix/var/nix/profiles/per-user + mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root + mkdir -p -m 0700 "$HOME/.nix-defexpr" + mkdir -p -m 0700 "$HOME/.ssh" + cat > "$HOME/.ssh/config" << EOF + Host bscpm04.bsc.es gitlab-internal.bsc.es + User git + ProxyCommand nc -X connect -x hut:23080 %h %p + Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es + ProxyCommand nc -X connect -x hut:23080 %h %p + EOF + cat >> "$HOME/.ssh/known_hosts" << EOF + bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT + gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3 + EOF + . ${pkgs.nix}/etc/profile.d/nix-daemon.sh + # Required to load SSL certificate paths + . ${pkgs.cacert}/nix-support/setup-hook + ''; + environmentVariables = { + ENV = "/etc/profile"; + USER = "root"; + NIX_REMOTE = "daemon"; + PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin"; + }; }; }; }; + # DOCKER* chains are useless, override at FORWARD + networking.firewall.extraCommands = '' + # Allow docker to use our proxy + iptables -I FORWARD 1 -p tcp -i docker0 -d hut --dport 23080 -j nixos-fw-accept + # Block anything else coming from docker + iptables -I FORWARD 2 -p all -i docker0 -j nixos-fw-log-refuse + ''; + #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; diff --git a/secrets/gitlab-bsc-docker-token.age b/secrets/gitlab-bsc-docker-token.age new file mode 100644 index 0000000..74b83e0 --- /dev/null +++ b/secrets/gitlab-bsc-docker-token.age @@ -0,0 +1,11 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg WSdjyQPzBJ4JbzQpGeq1AAYpWKoXmLI1ZtmNmM5QOzs +qGDlDT31DQF1DdHen0+5+52DdsQlabJdA2pOB5O1I6g +-> ssh-ed25519 CAWG4Q wioWMDxQjN+d4JdIbCwZg0DLQu1OH2mV6gukRprjuAs +670fE61hidOEh20hHiQAhP0+CjDF0WMBNzgwkGT8Yqg +-> ssh-ed25519 MSF3dg DN19uvAEtqq4708P6HpuX9i/o/qAvHX6dj69dCF2H1o +4Lu9GnjiFLMeXJ2C7aVPJsCHCQVlhylNWJi896Av92s +--- 7cKBwOYNOUZ2h3/kAY09aSMASZSxX7hZIT4kvlIiT6w +6fQF5=bX+v e`7/A~PѦ7 +A)h=oZ$ ^V0/܅r +kubĶ:R>^gik_*% a7KG&PIn \ No newline at end of file diff --git a/secrets/secrets.nix b/secrets/secrets.nix index ac3eac3..58ec2d6 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -9,6 +9,7 @@ in "gitea-runner-token.age".publicKeys = hut; "gitlab-runner-docker-token.age".publicKeys = hut; "gitlab-runner-shell-token.age".publicKeys = hut; + "gitlab-bsc-docker-token.age".publicKeys = hut; "nix-serve.age".publicKeys = hut; "jungle-robot-password.age".publicKeys = hut; "ipmi.yml.age".publicKeys = hut; -- 2.49.0 From fdac196c6c4227b4e747cf80870ec895451ea036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Wed, 26 Feb 2025 15:31:05 +0100 Subject: [PATCH 294/472] Fix nginx /cache regex `nix-serve` does not handle duplicates in the path: ``` hut$ curl http://127.0.0.1:5000/nix-cache-info StoreDir: /nix/store WantMassQuery: 1 Priority: 30 hut$ curl http://127.0.0.1:5000//nix-cache-info File not found. ``` This meant that the cache was not accessible via: `curl https://jungle.bsc.es/cache/nix-cache-info` but `curl https://jungle.bsc.es/cachenix-cache-info` worked. Reviewed-by: Rodrigo Arias Mallo --- m/hut/nginx.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index ef7c416..696ce01 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -40,7 +40,7 @@ in proxy_redirect http:// $scheme://; } location /cache { - rewrite ^/cache(.*) /$1 break; + rewrite ^/cache/(.*) /$1 break; proxy_pass http://127.0.0.1:5000; proxy_redirect http:// $scheme://; } -- 2.49.0 From 129273e8d8ba0dedda2398db2abf7032e1570382 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Apr 2025 10:03:05 +0200 Subject: [PATCH 295/472] Make nginx listen on all interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Needed for local hosts to contact the nix cache via HTTP directly. We also allow the incoming traffic on port 80. Reviewed-by: Aleix Boné --- m/hut/nginx.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index 696ce01..97afc3a 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -17,13 +17,14 @@ let }; in { + networking.firewall.allowedTCPPorts = [ 80 ]; services.nginx = { enable = true; virtualHosts."jungle.bsc.es" = { root = "${website}"; listen = [ { - addr = "127.0.0.1"; + addr = "0.0.0.0"; port = 80; } ]; -- 2.49.0 From 89c65ea578ce7ddcb95cf9aad0c088478946621f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Apr 2025 10:23:26 +0200 Subject: [PATCH 296/472] Clean all iptables rules on stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevents the "iptables: Chain already exists." error by making sure that we don't leave any chain on start. The ideal solution is to use iptables-restore instead, which will do the right job. But this needs to be changed in NixOS entirely. Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 66cc881..b00351d 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -56,6 +56,11 @@ iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept ''; + # Flush all rules and chains on stop so it won't break on start + extraStopCommands = '' + iptables -F + iptables -X + ''; }; }; -- 2.49.0 From 55b71d690192d3c7d3dc78e86c1763fc02b99542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Wed, 26 Feb 2025 16:03:26 +0100 Subject: [PATCH 297/472] Use hut nix cache in owl1, owl2 and raccoon For owl1 and owl2 directly connect to hut via LAN with HTTP, but for raccoon pass via the proxy using jungle.bsc.es with HTTPS. There is no risk of tampering as packages are signed. Reviewed-by: Rodrigo Arias Mallo --- m/common/xeon/net.nix | 2 +- m/module/hut-substituter.nix | 10 ++++++++++ m/owl1/configuration.nix | 1 + m/owl2/configuration.nix | 1 + m/raccoon/configuration.nix | 5 +++++ 5 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 m/module/hut-substituter.nix diff --git a/m/common/xeon/net.nix b/m/common/xeon/net.nix index 965bc1f..09e83ed 100644 --- a/m/common/xeon/net.nix +++ b/m/common/xeon/net.nix @@ -11,7 +11,7 @@ proxy = { default = "http://hut:23080/"; - noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40"; + noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40,hut"; # Don't set all_proxy as go complains and breaks the gitlab runner, see: # https://github.com/golang/go/issues/16715 allProxy = null; diff --git a/m/module/hut-substituter.nix b/m/module/hut-substituter.nix new file mode 100644 index 0000000..9b871f6 --- /dev/null +++ b/m/module/hut-substituter.nix @@ -0,0 +1,10 @@ +{ config, ... }: +{ + nix.settings = + # Don't add hut as a cache to itself + assert config.networking.hostName != "hut"; + { + substituters = [ "http://hut/cache" ]; + trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + }; +} diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index 7fc4a8f..20e3cf5 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -8,6 +8,7 @@ ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/debuginfod.nix + ../module/hut-substituter.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 3ea9413..54849f6 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -8,6 +8,7 @@ ../module/slurm-client.nix ../module/slurm-firewall.nix ../module/debuginfod.nix + ../module/hut-substituter.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index be9b10c..dd15a82 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -25,6 +25,11 @@ } ]; }; + nix.settings = { + substituters = [ "https://jungle.bsc.es/cache" ]; + trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + }; + # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; hardware.graphics.enable = true; -- 2.49.0 From d84645f3e102af4b52b9aaed555dce023ce3f3cf Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Apr 2025 12:15:33 +0200 Subject: [PATCH 298/472] Add bscpm04.bsc.es SSH host and public key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows fetching repositories from hut and other machines in jungle without the need to do any extra configuration. Reviewed-by: Aleix Boné --- m/common/base/ssh.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/m/common/base/ssh.nix b/m/common/base/ssh.nix index 7d7b939..b531076 100644 --- a/m/common/base/ssh.nix +++ b/m/common/base/ssh.nix @@ -10,7 +10,7 @@ in # Connect to intranet git hosts via proxy programs.ssh.extraConfig = '' - Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es + Host bscpm02.bsc.es bscpm03.bsc.es bscpm04.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es User git ProxyCommand nc -X connect -x hut:23080 %h %p @@ -22,6 +22,7 @@ in programs.ssh.knownHosts = hostsKeys // { "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; + "bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT"; "glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; "glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; }; -- 2.49.0 From 93f8d3aa8902d797c0a32597dc53fd31f5d8e55f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Apr 2025 12:17:00 +0200 Subject: [PATCH 299/472] Allow traffic from docker to enter port 23080 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: hut% sudo docker run -it --rm alpine /bin/ash -xc 'true | nc -w 3 -v 10.0.40.7 23080' + true + nc -w 3 -v 10.0.40.7 23080 nc: 10.0.40.7 (10.0.40.7:23080): Operation timed out After: hut% sudo docker run -it --rm alpine /bin/ash -xc 'true | nc -w 3 -v 10.0.40.7 23080' + true + nc -w 3 -v 10.0.40.7 23080 10.0.40.7 (10.0.40.7:23080) open Fixes: https://jungle.bsc.es/git/rarias/jungle/issues/94 Reviewed-by: Aleix Boné --- m/hut/gitlab-runner.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index 9241e5d..a68d8d1 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -97,12 +97,14 @@ }; }; - # DOCKER* chains are useless, override at FORWARD + # DOCKER* chains are useless, override at FORWARD and nixos-fw networking.firewall.extraCommands = '' # Allow docker to use our proxy iptables -I FORWARD 1 -p tcp -i docker0 -d hut --dport 23080 -j nixos-fw-accept # Block anything else coming from docker iptables -I FORWARD 2 -p all -i docker0 -j nixos-fw-log-refuse + # Allow incoming traffic from docker to 23080 + iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT ''; #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; -- 2.49.0 From d0f151595f2bf4b666163fd8e57f51841f61859c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Apr 2025 12:46:08 +0200 Subject: [PATCH 300/472] Don't forward any docker traffic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Access to the 23080 local port will be done by applying the INPUT rules, which pass through nixos-fw. Reviewed-by: Aleix Boné --- m/hut/gitlab-runner.nix | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index a68d8d1..2fe7c1c 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -99,10 +99,9 @@ # DOCKER* chains are useless, override at FORWARD and nixos-fw networking.firewall.extraCommands = '' - # Allow docker to use our proxy - iptables -I FORWARD 1 -p tcp -i docker0 -d hut --dport 23080 -j nixos-fw-accept - # Block anything else coming from docker - iptables -I FORWARD 2 -p all -i docker0 -j nixos-fw-log-refuse + # Don't forward any traffic from docker + iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse + # Allow incoming traffic from docker to 23080 iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT ''; -- 2.49.0 From 80309d107b341d29e6ecae5764de615403d3dda7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Apr 2025 11:20:57 +0200 Subject: [PATCH 301/472] Increase data retention to 5 years MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have more space, we can extend the retention time to 5 years to hold the monitoring metrics. For a year we have: # du -sh /var/lib/prometheus2 13G /var/lib/prometheus2 So we can expect it to increase to about 65 GiB. In the future we may want to reduce some adquisition frequency. Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index ee4983a..e6d4432 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -46,7 +46,7 @@ services.prometheus = { enable = true; port = 9001; - retentionTime = "1y"; + retentionTime = "5y"; listenAddress = "127.0.0.1"; }; -- 2.49.0 From 92eacfad2015d98f81b006b6af2e8d443178cba9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Apr 2025 11:41:43 +0200 Subject: [PATCH 302/472] Add raccoon node exporter monitoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index e6d4432..d4dc1e1 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -250,6 +250,14 @@ module = [ "raccoon" ]; }; } + { + job_name = "raccoon"; + static_configs = [ + { + targets = [ "127.0.0.1:19002" ]; # Node exporter + } + ]; + } { job_name = "ipmi-fox"; metrics_path = "/ipmi"; -- 2.49.0 From 5208a3483bc03185d87928100dcbc03e9951f78e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Apr 2025 16:16:42 +0200 Subject: [PATCH 303/472] Set keep-outputs to true in all machines From the documentation of keep-outputs, setting it to true would prevent the GC from removing build time dependencies: If true, the garbage collector will keep the outputs of non-garbage derivations. If false (default), outputs will be deleted unless they are GC roots themselves (or reachable from other roots). In general, outputs must be registered as roots separately. However, even if the output of a derivation is registered as a root, the collector will still delete store paths that are used only at build time (e.g., the C compiler, or source tarballs downloaded from the network). To prevent it from doing so, set this option to true. See: https://nix.dev/manual/nix/2.24/command-ref/conf-file.html#conf-keep-outputs Reviewed-by: Aleix Roca Nonell --- m/common/base/nix.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/common/base/nix.nix b/m/common/base/nix.nix index dc79647..5eee5b7 100644 --- a/m/common/base/nix.nix +++ b/m/common/base/nix.nix @@ -23,6 +23,7 @@ trusted-users = [ "@wheel" ]; flake-registry = pkgs.writeText "global-registry.json" ''{"flakes":[],"version":2}''; + keep-outputs = true; }; gc = { -- 2.49.0 From f4229e34f67d8c99a5e11ebd68bf21452df343d8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 24 Apr 2025 23:51:06 +0200 Subject: [PATCH 304/472] Add custom nix-daemon exporter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to see which derivations are being built in realtime. It is a bit of a hack, but it seems to work. We simply look at the environment of the child processes of nix-daemon (usually bash) and then look for the $name variable which should hold the current derivation being built. Needs root to be able to read the environ file of the different nix-daemon processes as they are owned by the nixbld* users. See: https://discourse.nixos.org/t/query-ongoing-builds/23486 Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 2 ++ m/hut/nix-daemon-builds.sh | 26 ++++++++++++++++++++++++++ m/hut/nix-daemon-exporter.nix | 23 +++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100755 m/hut/nix-daemon-builds.sh create mode 100644 m/hut/nix-daemon-exporter.nix diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index d4dc1e1..4f1352e 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -4,6 +4,7 @@ imports = [ ../module/slurm-exporter.nix ./gpfs-probe.nix + ./nix-daemon-exporter.nix ]; age.secrets.grafanaJungleRobotPassword = { @@ -108,6 +109,7 @@ "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" "127.0.0.1:9341" # Slurm exporter "127.0.0.1:9966" # GPFS custom exporter + "127.0.0.1:9999" # Nix-daemon custom exporter "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" ]; }]; diff --git a/m/hut/nix-daemon-builds.sh b/m/hut/nix-daemon-builds.sh new file mode 100755 index 0000000..79ab65c --- /dev/null +++ b/m/hut/nix-daemon-builds.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +# Locate nix daemon pid +nd=$(pgrep -o nix-daemon) + +# Locate children of nix-daemon +pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children") + +# For each children, locate 2nd level children +pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n') + +cat </dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.') + user=$(ps -o uname= -p "$pid") + if [ -n "$name" -a -n "$user" ]; then + printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name" + fi +done diff --git a/m/hut/nix-daemon-exporter.nix b/m/hut/nix-daemon-exporter.nix new file mode 100644 index 0000000..9353fe7 --- /dev/null +++ b/m/hut/nix-daemon-exporter.nix @@ -0,0 +1,23 @@ +{ pkgs, config, lib, ... }: +let + script = pkgs.runCommand "nix-daemon-exporter.sh" { } + '' + cp ${./nix-daemon-builds.sh} $out; + chmod +x $out + '' + ; +in +{ + systemd.services.nix-daemon-exporter = { + description = "Daemon to export nix-daemon metrics"; + path = [ pkgs.procps pkgs.ripgrep ]; + wantedBy = [ "default.target" ]; + serviceConfig = { + Type = "simple"; + ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}"; + # Needed root to read the environment, potentially unsafe + User = "root"; + Group = "root"; + }; + }; +} -- 2.49.0 From 4048b3327a3564993fe26869ca4fd8cf34c43728 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 23 May 2025 15:40:09 +0200 Subject: [PATCH 305/472] Add meteocat exporter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to track ambient temperature changes and estimate the temperature delta between the server room and exterior temperature. We should be able to predict when we would need to stop the machines due to excesive temperature as summer approaches. Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 2 + m/module/meteocat-exporter.nix | 17 ++++++++ pkgs/meteocat-exporter/default.nix | 25 +++++++++++ pkgs/meteocat-exporter/meteocat-exporter | 54 ++++++++++++++++++++++++ pkgs/meteocat-exporter/setup.py | 11 +++++ pkgs/overlay.nix | 1 + 6 files changed, 110 insertions(+) create mode 100644 m/module/meteocat-exporter.nix create mode 100644 pkgs/meteocat-exporter/default.nix create mode 100644 pkgs/meteocat-exporter/meteocat-exporter create mode 100644 pkgs/meteocat-exporter/setup.py diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 4f1352e..8f1bea4 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -3,6 +3,7 @@ { imports = [ ../module/slurm-exporter.nix + ../module/meteocat-exporter.nix ./gpfs-probe.nix ./nix-daemon-exporter.nix ]; @@ -110,6 +111,7 @@ "127.0.0.1:9341" # Slurm exporter "127.0.0.1:9966" # GPFS custom exporter "127.0.0.1:9999" # Nix-daemon custom exporter + "127.0.0.1:9929" # Meteocat custom exporter "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" ]; }]; diff --git a/m/module/meteocat-exporter.nix b/m/module/meteocat-exporter.nix new file mode 100644 index 0000000..ffc0338 --- /dev/null +++ b/m/module/meteocat-exporter.nix @@ -0,0 +1,17 @@ +{ config, lib, pkgs, ... }: + +with lib; + +{ + systemd.services."prometheus-meteocat-exporter" = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Restart = mkDefault "always"; + PrivateTmp = mkDefault true; + WorkingDirectory = mkDefault "/tmp"; + DynamicUser = mkDefault true; + ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter"; + }; + }; +} diff --git a/pkgs/meteocat-exporter/default.nix b/pkgs/meteocat-exporter/default.nix new file mode 100644 index 0000000..5bc4f09 --- /dev/null +++ b/pkgs/meteocat-exporter/default.nix @@ -0,0 +1,25 @@ +{ python3Packages, lib }: + +python3Packages.buildPythonApplication rec { + pname = "meteocat-exporter"; + version = "1.0"; + + src = ./.; + + doCheck = false; + + build-system = with python3Packages; [ + setuptools + ]; + + dependencies = with python3Packages; [ + beautifulsoup4 + lxml + prometheus-client + ]; + + meta = with lib; { + description = "MeteoCat Prometheus Exporter"; + platforms = platforms.linux; + }; +} diff --git a/pkgs/meteocat-exporter/meteocat-exporter b/pkgs/meteocat-exporter/meteocat-exporter new file mode 100644 index 0000000..acc9f3e --- /dev/null +++ b/pkgs/meteocat-exporter/meteocat-exporter @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import time +from prometheus_client import start_http_server, Gauge +from bs4 import BeautifulSoup +from urllib import request + +# Configuration ------------------------------------------- +meteo_station = "X8" # Barcelona - Zona Universitària +listening_port = 9929 +update_period = 60 * 5 # Each 5 min +# --------------------------------------------------------- + +metric_tmin = Gauge('meteocat_temp_min', 'Min temperature') +metric_tmax = Gauge('meteocat_temp_max', 'Max temperature') +metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature') +metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation') + +def update(st): + url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st + response = request.urlopen(url) + data = response.read() + soup = BeautifulSoup(data, 'lxml') + table = soup.find("table", {"class" : "tblperiode"}) + rows = table.find_all('tr') + row = rows[-1] # Take the last row + row_data = [] + header = row.find('th') + header_text = header.text.strip() + row_data.append(header_text) + for col in row.find_all('td'): + row_data.append(col.text) + try: + # Sometimes it will return '(s/d)' and fail to parse + metric_tavg.set(float(row_data[1])) + metric_tmax.set(float(row_data[2])) + metric_tmin.set(float(row_data[3])) + metric_srad.set(float(row_data[10])) + #print("ok: temp_avg={}".format(float(row_data[1]))) + except: + print("cannot parse row: {}".format(row)) + metric_tavg.set(float("nan")) + metric_tmax.set(float("nan")) + metric_tmin.set(float("nan")) + metric_srad.set(float("nan")) + +if __name__ == '__main__': + start_http_server(port=listening_port, addr="localhost") + while True: + try: + update(meteo_station) + except: + print("update failed") + time.sleep(update_period) diff --git a/pkgs/meteocat-exporter/setup.py b/pkgs/meteocat-exporter/setup.py new file mode 100644 index 0000000..9cc74d3 --- /dev/null +++ b/pkgs/meteocat-exporter/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='meteocat-exporter', + version='1.0', + # Modules to import from other scripts: + packages=find_packages(), + # Executables + scripts=["meteocat-exporter"], + ) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 2eb9229..995ceeb 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -54,4 +54,5 @@ final: prev: }); prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; + meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; } -- 2.49.0 From dd15f9c943112082560806847526ea7d0f580880 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 11:24:12 +0200 Subject: [PATCH 306/472] Add UPC temperature sensor monitoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These sensors are part of their air quality measurements, which just happen to be very close to our server room. Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 2 + m/module/upc-qaire-exporter.nix | 17 +++++ pkgs/overlay.nix | 1 + pkgs/upc-qaire-exporter/default.nix | 24 +++++++ pkgs/upc-qaire-exporter/setup.py | 11 ++++ pkgs/upc-qaire-exporter/upc-qaire-exporter | 74 ++++++++++++++++++++++ 6 files changed, 129 insertions(+) create mode 100644 m/module/upc-qaire-exporter.nix create mode 100644 pkgs/upc-qaire-exporter/default.nix create mode 100644 pkgs/upc-qaire-exporter/setup.py create mode 100644 pkgs/upc-qaire-exporter/upc-qaire-exporter diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 8f1bea4..7042c91 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -4,6 +4,7 @@ imports = [ ../module/slurm-exporter.nix ../module/meteocat-exporter.nix + ../module/upc-qaire-exporter.nix ./gpfs-probe.nix ./nix-daemon-exporter.nix ]; @@ -112,6 +113,7 @@ "127.0.0.1:9966" # GPFS custom exporter "127.0.0.1:9999" # Nix-daemon custom exporter "127.0.0.1:9929" # Meteocat custom exporter + "127.0.0.1:9928" # UPC Qaire custom exporter "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" ]; }]; diff --git a/m/module/upc-qaire-exporter.nix b/m/module/upc-qaire-exporter.nix new file mode 100644 index 0000000..ddb27eb --- /dev/null +++ b/m/module/upc-qaire-exporter.nix @@ -0,0 +1,17 @@ +{ config, lib, pkgs, ... }: + +with lib; + +{ + systemd.services."prometheus-upc-qaire-exporter" = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Restart = mkDefault "always"; + PrivateTmp = mkDefault true; + WorkingDirectory = mkDefault "/tmp"; + DynamicUser = mkDefault true; + ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter"; + }; + }; +} diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 995ceeb..2a1df4a 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -55,4 +55,5 @@ final: prev: prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; + upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { }; } diff --git a/pkgs/upc-qaire-exporter/default.nix b/pkgs/upc-qaire-exporter/default.nix new file mode 100644 index 0000000..b5c14cb --- /dev/null +++ b/pkgs/upc-qaire-exporter/default.nix @@ -0,0 +1,24 @@ +{ python3Packages, lib }: + +python3Packages.buildPythonApplication rec { + pname = "upc-qaire-exporter"; + version = "1.0"; + + src = ./.; + + doCheck = false; + + build-system = with python3Packages; [ + setuptools + ]; + + dependencies = with python3Packages; [ + prometheus-client + requests + ]; + + meta = with lib; { + description = "UPC Qaire Prometheus Exporter"; + platforms = platforms.linux; + }; +} diff --git a/pkgs/upc-qaire-exporter/setup.py b/pkgs/upc-qaire-exporter/setup.py new file mode 100644 index 0000000..e2238a7 --- /dev/null +++ b/pkgs/upc-qaire-exporter/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='upc-qaire-exporter', + version='1.0', + # Modules to import from other scripts: + packages=find_packages(), + # Executables + scripts=["upc-qaire-exporter"], + ) diff --git a/pkgs/upc-qaire-exporter/upc-qaire-exporter b/pkgs/upc-qaire-exporter/upc-qaire-exporter new file mode 100644 index 0000000..39697aa --- /dev/null +++ b/pkgs/upc-qaire-exporter/upc-qaire-exporter @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +import time +from prometheus_client import start_http_server, Gauge +import requests, json +from datetime import datetime, timedelta + +# Configuration ------------------------------------------- +listening_port = 9928 +update_period = 60 * 5 # Each 5 min +# --------------------------------------------------------- + +metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor') + +def genparams(): + d = {} + d['topic'] = 'TEMPERATURE' + d['shift_dates_to'] = '' + d['datapoints'] = 301 + d['devicesAndColors'] = '1148418@@@#40ACB6' + + now = datetime.now() + + d['fromDate'] = now.strftime('%d/%m/%Y') + d['toDate'] = now.strftime('%d/%m/%Y') + d['serviceFrequency'] = 'NONE' + + # WTF! + for i in range(7): + for j in range(48): + key = 'week.days[{}].hours[{}].value'.format(i, j) + d[key] = 'OPEN' + + return d + +def measure(): + # First we need to load session + s = requests.Session() + r = s.get("https://upc.edu/sirena") + if r.status_code != 200: + print("bad HTTP status code on new session: {}".format(r.status_code)) + return + + if s.cookies.get("JSESSIONID") is None: + print("cannot get JSESSIONID") + return + + # Now we can pull the data + url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json" + r = s.post(url, data=genparams()) + + if r.status_code != 200: + print("bad HTTP status code on data: {}".format(r.status_code)) + return + + #print(r.text) + j = json.loads(r.content) + + # Just take the last one + last = j['data']['chartElementList'][-1] + temp = last['values']['1148418-Temperatura'] + + return temp + +if __name__ == '__main__': + start_http_server(port=listening_port, addr="localhost") + while True: + try: + metric_temp.set(measure()) + except: + print("measure failed") + metric_temp.set(float("nan")) + + time.sleep(update_period) -- 2.49.0 From 1eac0fcad8211195499bc566e6c70312b31af700 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 11:40:07 +0200 Subject: [PATCH 307/472] Remove pam_slurm_adopt from fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We no longer will be able to use SLURM from jungle. Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 97ac686..60ab5b0 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -56,20 +56,4 @@ wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = script; }; - - # Only allow SSH connections from users who have a SLURM allocation - # See: https://slurm.schedmd.com/pam_slurm_adopt.html - security.pam.services.sshd.rules.account.slurm = { - control = "required"; - enable = true; - modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so"; - args = [ "log_level=debug5" ]; - order = 999999; # Make it last one - }; - - # Disable systemd session (pam_systemd.so) as it will conflict with the - # pam_slurm_adopt.so module. What happens is that the shell is first adopted - # into the slurmstepd task and then into the systemd session, which is not - # what we want, otherwise it will linger even if all jobs are gone. - security.pam.services.sshd.startSession = lib.mkForce false; } -- 2.49.0 From 4528b7c2a6fb271e7c0136da4dcd88c14efd5d4f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 11:43:16 +0200 Subject: [PATCH 308/472] Remove fox from SLURM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 2 -- m/module/slurm-client.nix | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 60ab5b0..3a188a6 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -5,8 +5,6 @@ ../common/xeon.nix ../module/ceph.nix ../module/emulation.nix - ../module/slurm-client.nix - ../module/slurm-firewall.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 46478a8..21ae945 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -43,13 +43,11 @@ in { clusterName = "jungle"; nodeName = [ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" - "fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=1 Feature=fox" "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; partitionName = [ "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" - "fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" ]; # See slurm.conf(5) for more details about these options. @@ -77,7 +75,7 @@ in { SuspendTimeout=60 ResumeProgram=${resumeProgram} ResumeTimeout=300 - SuspendExcNodes=hut,fox + SuspendExcNodes=hut # Turn the nodes off after 1 hour of inactivity SuspendTime=3600 -- 2.49.0 From 9bee145e2546650d0f677c814d335ffefdaee9b8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 11:50:57 +0200 Subject: [PATCH 309/472] Remove Ceph module from fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will no longer be accesible from the UPC. Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 3a188a6..e33afab 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -3,7 +3,6 @@ { imports = [ ../common/xeon.nix - ../module/ceph.nix ../module/emulation.nix ]; -- 2.49.0 From 201ff64b25b4c7cefef302f25f5f6a34ddf1c797 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 12:00:21 +0200 Subject: [PATCH 310/472] Distrust fox SSH key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We no longer will share secrets with fox until we can regain our trust. Reviewed-by: Aleix Boné --- keys.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/keys.nix b/keys.nix index a2b8c2c..f3a6bcf 100644 --- a/keys.nix +++ b/keys.nix @@ -13,7 +13,8 @@ rec { }; hostGroup = with hosts; rec { - compute = [ owl1 owl2 fox ]; + untrusted = [ fox ]; + compute = [ owl1 owl2 ]; playground = [ eudy koro ]; storage = [ bay lake2 ]; monitor = [ hut ]; -- 2.49.0 From cd0c0704393b6a3d8f7deb024266fc7d93a2fecd Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 12:27:57 +0200 Subject: [PATCH 311/472] Rotate fox SSH host key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent decrypting old secrets by reading the git history. Reviewed-by: Aleix Boné --- keys.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keys.nix b/keys.nix index f3a6bcf..83b8ff4 100644 --- a/keys.nix +++ b/keys.nix @@ -9,7 +9,7 @@ rec { koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; - fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDa9lId4rB/EKGkkCCVOy0cuId2SYLs+8W8kx0kmpO1y fox"; + fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; }; hostGroup = with hosts; rec { -- 2.49.0 From a22c862192a90e833d2eb998d6f5fbc702e586cc Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 12:30:03 +0200 Subject: [PATCH 312/472] Rekey all secrets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fox is no longer able to use munge or ceph, so we remove the key and rekey them. Reviewed-by: Aleix Boné --- secrets/ceph-user.age | Bin 1023 -> 913 bytes secrets/gitea-runner-token.age | Bin 479 -> 479 bytes secrets/gitlab-bsc-docker-token.age | 19 +++++++++---------- secrets/gitlab-runner-docker-token.age | 17 ++++++++--------- secrets/gitlab-runner-shell-token.age | 16 ++++++++-------- secrets/ipmi.yml.age | Bin 1184 -> 1184 bytes secrets/jungle-robot-password.age | Bin 477 -> 477 bytes secrets/munge-key.age | Bin 2006 -> 1896 bytes secrets/nix-serve.age | Bin 534 -> 534 bytes 9 files changed, 25 insertions(+), 27 deletions(-) diff --git a/secrets/ceph-user.age b/secrets/ceph-user.age index 880fbbf33f23b68b298d1efcfde58e2581d46ba7..c37e0f4727564e981f6c1b8c6e404d092f3034ac 100644 GIT binary patch delta 825 zcmey*K9PNbPJM($Nou%PpkKaWaE5_Pu2-c;zKcg*a!Hw!X{c9mXl99%k84=DbCpY0 zF_%$Da;aN-RZf^sSb$Shx>s;sa709rWvWYQv1dk_pR2o9MU-Pjq@j~vK9{bYLUD11 zZfc5=si~o*LUFodVy>ftuXC|qSfQzpS87s@v!SP{ghD$_Nptpr-- zbdJzYw8$+hPAd(}3o^(|&UJMvNDs^B3d+a`sVH&xjr8<)c1lh!$j+(q&N1?I%yY~! zchAoC3Qsl4FVFQTEHleSj{#@LaCehHg-WmVqNG3aGw%05ACeX9J8XZVlKz<0MGQSvZ~-v7Z=CKPz%?BU{B95Q%kdgvPvg!|0;`O zpQuXLka9ndVsz_#gWZf%(iI}z%gYU%i=9f{4E-JRJY3AOJfcEe4UIh9qg+!;9nBpB z>XY5PoLq~|G9tMud^|FY@{JSyGV(*S{QNvTi?d6e3v#@Rw2kt!{0x)A+@gFEoif5A zER(r(b#)a=qRPEI!opolDng9B+ynhm{0x$dva`e7{Sy5P3~~~KQ!~e?v1dqVgs)F|VN|(kc5-%ZiCboIcwu6yexQGdOQBh5UZQJQ zR<@C0M81GgPPv(3hGmY)WxiqF1(}suK~X8jzPTQTQIWpcp5eaQCh3k`z8U(Z zNigsU?x#p1#IaF6GX7E~UO6KIXaZzFFSIIT>ad>E(`-;~B-n(=APk%zdLg zolJuY%Ca+yvoey+^}U@evh&K*1N?%`9P>>olM2gx%$z*A5{tu~qTCIVLj!U`%*u<3 z^MVrn%FA7iQ#`WW4J#|7vdmIV-9mzV3rk!lpJf!U_tbVa$qe^)^A9d?t*k69H#Vt^ zDk!P&^365#Ov`uA4K>%V@U}1rPsw!T3a#>p@-7Ups3=T~a5lCGH1RLbsPym+GWB;a zjLHnj^h)>eOAE;jHgonwj{)rv*Rb$F1;Yr}Aop_1ob>$Md`II-FR%Iv^YlEAFwZt7XA9T^qwrSItC6KKGttE;Qv zT3MLx6Vd<=2 z3z%OqtTA?F_9{bKJN8}z?4&*N%&W;B_t@xrnad0S4qsI$6~7ab=Y+BD@^ W)3(hrFZtAL!abK?l>NcIj28ePPC9=8 diff --git a/secrets/gitea-runner-token.age b/secrets/gitea-runner-token.age index 31d52cf5ca7ae9694122700299a1796a6beecd3e..2b59fef1ec69d035923a75f20fa27a3545397ef5 100644 GIT binary patch delta 424 zcmcc5e4lxOPJM}cm|uyXOO#v<- z4-)kz$t8Z_ZsCDmc|L)GL3vJ!sWxn*YMUS5TnCgGv3zM<}>M)_QU6)BF9 zKKcHMo@thqJ|TXl202N7=J|$hxgq9G!Oqzhc{%w(j{2rPX1-jyy1EJ>Y0lckVTSqo zWnNC1fmxp3`boJ?m672tMajlyo<#xv#X%9lCfc4+l`gJa4|m>Tjoff(TE3x39z&wt zDj$bh*BQ6d=6=0r%IWdl_tEo-v!)*nHMN@jp-tRhsqF{ntio{Vkcun4>hE7}J(|`q U^GRI0#+BI*&k67SDA~LU0MFBx+W-In delta 424 zcmcc5e4lxOPJL=nScGY1UP`!&Pr7ABxp{GgTN z4-)kru2pFTK^BQQ?q=ZuRT+t{S&q56S^jSMZppsEfuT7m6((jWPQ?*LnI>HRjxMGi z`c+Zx8EL6ykwtD^RbE9&e%=B3x!zGO{=t@k6{aP*AqD0ZUin||9_c1FUa_O?pXCjyLE1S z9IQ1*GY^+>KYHd`Tm7b}^xUIk+q>^dZKywewl;2er`^Y7Ga0s}Od?w(v%YiwOn>1X T-gww*x|gx!&zc`KZBZ8iFqoPy diff --git a/secrets/gitlab-bsc-docker-token.age b/secrets/gitlab-bsc-docker-token.age index 74b83e0..f379805 100644 --- a/secrets/gitlab-bsc-docker-token.age +++ b/secrets/gitlab-bsc-docker-token.age @@ -1,11 +1,10 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg WSdjyQPzBJ4JbzQpGeq1AAYpWKoXmLI1ZtmNmM5QOzs -qGDlDT31DQF1DdHen0+5+52DdsQlabJdA2pOB5O1I6g --> ssh-ed25519 CAWG4Q wioWMDxQjN+d4JdIbCwZg0DLQu1OH2mV6gukRprjuAs -670fE61hidOEh20hHiQAhP0+CjDF0WMBNzgwkGT8Yqg --> ssh-ed25519 MSF3dg DN19uvAEtqq4708P6HpuX9i/o/qAvHX6dj69dCF2H1o -4Lu9GnjiFLMeXJ2C7aVPJsCHCQVlhylNWJi896Av92s ---- 7cKBwOYNOUZ2h3/kAY09aSMASZSxX7hZIT4kvlIiT6w -6fQF5=bX+v e`7/A~PѦ7 -A)h=oZ$ ^V0/܅r -kubĶ:R>^gik_*% a7KG&PIn \ No newline at end of file +-> ssh-ed25519 HY2yRg XPOFoZqY+AnKC77jrgNqAm1ADphurfuhO4NRrfiuUDc +iCfMMpGHyaYHGy6ci8sqjUtcPeteLlyvLGEF79VPOEc +-> ssh-ed25519 CAWG4Q 6OsGrnM+/c5lTN81Rvp166K+ygmSIFeSYzXxYg25KGE +Av1zTw2zK4Gufzti9kQaye7C362GCiDRRHzCqBLR33g +-> ssh-ed25519 MSF3dg 8CHqJ7mEDvjvqbmF+eE6Em1Wi6eHAzEUpiExC1gm7S0 +bdwzYHw3RAbdHq+RsiFUP++sQ586VUlSnAzAOhiQUjI +--- gA5XSUfjUBol938sC5DbUf8PvQUIr2pNkS2nL95OF9c +Ea1G7ݩ[R\{~$GocQwKP&w6] +ѣ^z̄ 1kY2p2Knok/Xpt''$0co= \ No newline at end of file diff --git a/secrets/gitlab-runner-docker-token.age b/secrets/gitlab-runner-docker-token.age index cd1432e..3efea55 100644 --- a/secrets/gitlab-runner-docker-token.age +++ b/secrets/gitlab-runner-docker-token.age @@ -1,10 +1,9 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg GdmdkW+BqqwBgu30b846jv3J7jtCM+a3rgOERuA050A -FeGqM75jG9egesR+yyVKHm0/M+uBBp5Hclg4+qN0BR8 --> ssh-ed25519 CAWG4Q a0wTWHgulQUYDAMZmXf3dOf6PdYgCqNtSylzWVVRNVM -Bx+WSYaiY4ZwlSZJo2a1XPMQmbKOU7F0tKAqVRLBOPo --> ssh-ed25519 MSF3dg KccUvZZUbxbCrRWUWrX8KcHF6vQ5FV/BqUqI59G7dj4 -CFr7GXpZ9rPgy7HBfOyiYF9FnZUw6KcZwq9f7/0KaU8 ---- E0Rp6RR/8+o0jvB1lRdhnlabxvI6uu/IgL2ZpPXzTc8 -#H$F;%62rfX\Dn шȉx>&;cUI=M?TǸ"pxӭ\sbFWD{ -AW>?UHԳ \ No newline at end of file +-> ssh-ed25519 HY2yRg pXNTB/ailRwSEJG1pXvrzzpz5HqkDZdWVWnOH7JGeQ4 +NzA+2fxfkNRy/u+Zq96A02K1Vxy0ETYZjMkDVTKyCY8 +-> ssh-ed25519 CAWG4Q 7CLJWn+EAxoWDduXaOSrHaBFHQ4GIpYP/62FFTj3ZTI +vSYV1pQg2qI2ngCzM0nCZAnqdz1tbT4hM5m+/TyGU2c +-> ssh-ed25519 MSF3dg Akmp4NcZcDuaYHta/Vej6zulNSrAOCd5lmSV+OiBGC4 +qTxqVzTyywur+GjtUQdbaIUdH1fqCqPe6qPf8iHRa4w +--- uCKNqD1TmZZThOzlpsecBKx/k+noIWhCVMr/pzNwBr8 +r'Ƌs4˺AĥPL7` ) H-0AH5LQeH2bB޲CJG"-S\ H ssh-ed25519 HY2yRg xWRxJGWSzA5aplRYCYLB6aBwrUrQQJ2MtDYaD75V5nI -J07XF3NQiaYKKKNRcNWi9MloJD2wXHd+2K7bo6lF+QU --> ssh-ed25519 CAWG4Q jNWymbyCczcm8RcaIEbFQBlOMALsuxTl4+pLUi0aR20 -z5NixlrRD+Y7Z/aFPs6hiDW4/lp8CBQCeJYpbuG9yYM --> ssh-ed25519 MSF3dg QsUQloEKN3k1G49FQnNR/Do6ILgGpjFcw3zu5kk1Ako -IHwyFWUEWqCStNcFprnpBa8L5J6zKIsn+7HcgGRv3sM ---- oUia0fsL6opeYWACyXtHAu/Ld+bUIt/7S1VszYTvwgU -V*t2-7h&͢_!տ+(n (/}CNͷ|Nu5ù勚Kl"klOXyAe$ \ No newline at end of file +-> ssh-ed25519 HY2yRg s6iI9f25xulF4KXt+XY07kXXPKxXo7f2Ql/OTHN55Hk +WO4Fd2H9c+HL3+XhUF3BmEZVILlcchGxSrSmL2OEdGw +-> ssh-ed25519 CAWG4Q TBkdpx8k8K1NvW3wcvaF7omKFwEJ2DxWJp3tIOTjwCA +LcYgWRix23AQnw0OQ7f8+8S3J84CHUElX1vKZSETiLE +-> ssh-ed25519 MSF3dg WzrF8kjTP7BXXDjmUp7kPCKguthAW12RPo6Vy2RMmh4 +8C3mT9ktudCTANDxhyNszUkbeDG6X4wOJdx825++dYM +--- /w3YQ2UeTi67H1JR0GsdPz2KoLN2Y7BIZfFY+//AWjY +ӣ-`P@ބ)99l ZfV?I>΍w鉐 z40 2{i@ZxAHn% ʤ/WĔl}&얶(KSoz=d \ No newline at end of file diff --git a/secrets/ipmi.yml.age b/secrets/ipmi.yml.age index ec99e58f0e378b4bc7c7cdf61e3e3f668d1aa412..0240478155d4e35f1b1d58c328dff89f63e38fc2 100644 GIT binary patch delta 1136 zcmZ3$xqx$mPJLO4X`)k+VQzjwT0vE$PgbC-fxCIQc9prlSGtjE%p-YreK!s~?np>D@wu@UHVc zAr*cZrYT|Ok(quGX{otcUV*9M{=rTera=bv;l@#+9x0CbTs@ZxJsAYu&R95r0N11!i(; z+F4$P=?}CTOl9LXU*wNB_mm6VH*X?;TSi{FYfID1Cn|CNu6-U#OeSj{Y%h6q+qpWi zY<+{l%0m;sn{U2(tG;26{-%dcs~czBvGjJzY|7B7a27Q5KCvbyDb(gt#a;$2e$ ztrq@CQet8)xg-PVDWzaIQYo(JKC&an19|*L2q&U@>2Heo{!S$ZiU&;MGzoDJGOMPjUs<`jbXqQw)|HkFznva|J9hS?T5|vv$P0yutiG#wIiM9a$)ap|J delta 1136 zcmZ3$xqx$mPJMcYae!w|S$UbcetLPKr(?KRMW~TekiKb!VR(vDfR}!$ab!SdL}r#@ zF_(pNK&pYIafq*5idj;6SYWcFbBR-sWu8YyS)g}*Xn|!$sGo_UvqwQ#I+w1ULUD11 zZfc5=si~o*g0o||yGfuzZb)K@vxkwlWv)*|X>N&Spj%R;VMLW-QiWf>g`=slQFf%4 zNm{94x}kA7m$P$9kaJRUjz?I8n^}N~mw7;lZ(y2%x0il?wo#g8grk$Eaal-dVTxJ# z#E;_jPNBuA7E!JSQ30+QrWP(P#`-}SK0fXh;g(sxrcv(ZnNiLbk!9(I6)uiku1PLs zjzQWHLAe$F5jn0dW#;Z-7I|UjF5W&Rk;WNG2HHhQ?twvJ0bUkdy1KdwfvII-+Eo=r z<(`FRK^5L*29B17kr82)NvWO&rQv}o232A8r5@o`dHR{jTy9QlpDWyvKa|7W#{Ny` z@WJiYCej)IKKgv?ES9Lx``hJ|KNcGWpQ9m>GK8 zBFoRl_6eNGR^Z88?x$xZt*Q{CE!2B;^Q2=hgcp@%*&eytWEb&c^ESgMkqO@>%J(en zn09#Pz5xF(t&a;$6WSY|NU(flSE+mTdYv1u=Z1zyU)J5os6Q1fzV=p)ueVdO?~-r3 z>dt2IIutB8Y+#eV*7M)$s{R`qjIY>}Dr?W$KWh5kC-sHVWrtO@@WLO9*YYoa=%{3* zrd++X_`;Nys#YDW zF7IoTolE6!q+i{wxxBMpe$Vf0$N9%%=Y2gTe>0S|U)W;(;ac|xpC_LB-*3Dj&oe@M zW$2yk=GAd-+p?z@O#Q2VQ{;AK(NpvH&-drQ5agP3?f%K?^6R%gO{f)FX7g#Qs9BGE zamC96Yxy4S&@Jmr6uUQ(^R&u4fyh?dR+f~dL$#c8?>rU0PdS_Uq zB(u)D+{+s1a`JXv{JxdRO7;cs*5^+S`hKj^Z|Cl8)z(*X6WPpd`=c0Nh1zpmx;cld z+CQVe_9$<>P=^=q^x*qElAJG&BugL5D$U4{E1S&lSL%}8r?1)}OVbYTw|3Q2Ki)I- z>FN--lOpo>*d-H^7+gvwt?uqvs;Bs@KD2haRuKc^2M=@pYfsZ>yk8Vuk-X~tlNCWH zj+{BS^@HZm>do`t@jk9U#O}X$?i(4&FSiAhbN+Un)O(`lKD*T7#4(ncT|XaOaLs94 zF=y+wXz2-p&vu9YzxI3C>&H(2>n@AT@L}Bc@PP9Y(fp(DmwPOkczMT9_SfB;|Hb86 qoi{G=l<#Q#W)jQ#Nq&{%m&b>f^*97yo_Oei0n1x)af8Vf<(&Y7vib-B diff --git a/secrets/jungle-robot-password.age b/secrets/jungle-robot-password.age index 7b8cd84284e84acf2ffa496148466fc590495dc0..3e7053e43ff829c030a2e63a36e7b764bf5a5324 100644 GIT binary patch delta 422 zcmcc1e3yBGZoOksu~S}{MZS-7xO0GWa9U}UrLjv@N_e@mskUKYNU>*FesD!bh=q|S zS8_^ngmY?Oa;1BeyQf7}aE4))k8h<@W_D^$rgn0fe!69mg{f&tX;pSIm#&>cadC!j zYKoDmsiCEUvtzisNuWZxewkUeo4!k8P+3Y=u%%m&n`@YplV5IthkJg7bGTQ5U#Lf_ zp=DHQu5mG!iLpsop}Dz{hjEyXTb74Gc~HJrq+yw_dzgz;q>)L9o4;jFzJI7`ZbtUR zkK*;7X*r=0d1hhGIbOcGd8s++IYuS9A;#v35$*xu0qG@aAxQ=9i8<+hZjM~VPQHEy zF7772sX38`m97>3`GKZc!IoheNs(q*&VFu&E)`i>W)n6 zz9ojvvNX{j`4$O!I=mxGwKF+_H1Qi&Dqn2!=Yp6&0)64_tS>bVZY~ S(eF%4;o5^u*W&*C`wRfPW|@}& delta 422 zcmcc1e3yBGZheJ;v1z5NU$Bd(w!W8hs8_0SrnX;JM7d)`NQhZxUY@&Uif^igf1a@g zmw%;)r;(pQh=H4NYKmh>Wx9K0xlgWls;jSmW=dJGc3NsiQD#wTky)54m#&>cadC!j zYKoDmsiCEUvtzisNuWZAi+{48bC$k^c}AsoRj9s2m8-K+RBmyipGQTMe_~LOcBxN5 zPK8T=c1SXpVSZF#QD#t*b9RMehDBaxWtOpVhDC&LP`G|#c$%e|xk<9SMWl&~xs&U} zkK*-4u921A1?9=5{z0K3q3Iq48384QVZj-O{`w^mRVI$+E@>WtzLBL#Ar@Q~rIG0Y z!REnc+Fp4Ek(L?Sk*)z|`KG=GnQjFJX<;ErNg3H5z8Oj7$)Q}jy1EJxMm~iWW$s2^ z9$xM#Imsbe8LpMN`6d;9o)yN9;hD~kRhD6;WqFoP&dHHnZ2$Qt`FU#Ft(?rTR@3V6 zJoa^eoeW);tTQbBaBX(L!UfX}{B&J}=PzK}r+Hs$!M)ut+3RJeoNL|L<^Iy6OJTba SM@tLiiQ7jeelbtnBn1EqW0Eoe diff --git a/secrets/munge-key.age b/secrets/munge-key.age index ce50c123cb06c52273d098937b9cb0072650ee4f..372d152b06ca8631f5451961dcf19d14fba0aa44 100644 GIT binary patch delta 1816 zcmcb{|AKFVPJK~nSh!_mm`_AzaAjDauUDu~P-<>^RhVy3P=QlOSekQws&{x$X;QX9 zK9^s)c|b;pOKPx5QC^6#p=VTfRzPUEet4osxvOu6uX%_`iiK}@j-h{YGMBEMLUD11 zZfc5=si~o*LUFodVy>ftk#nSBNM*50slTCtbErvLqFY5~VwHJTc%gS%NOD1nS&o;F zOK@h9mtl4~SFoe2WvY*Np?Ru%T0~K@e|V{OQbBQLaj3SVsY{h>X<<-6RY_!?bEHS{ z#E;_PC6+}lp_V2E!9gAtzUd{-A!gaB5h+zAE)l`zrY7EHQO2c(c}~gZA(h!&p)N+r zVJ5~Ix!!q!!DafnX2JfBm7#%|j>$n~DcZSd<>6sYnV|;RMJ}F`;~B-nE&R$#Jwo!e z^ZY}MovT7p9Yc$qD?D>tyd%>*9mBj`O3ZWf5}hJl%t8XWvckV1vk%jIw0Eur#OASTgWZf%(iKv@yvieU{enu}Ju4#I)0`dM^}W;5vqH?0wF}C$i>tIN z>%B|U%stF1!d$sLlifq30{l~|yhB5ay?sm5jY7?H^aHEROr7(*(_Jz>i?nkRQ!>J{ zjf=T-b#)b-48j95+%qim0s?}=%nZ^zObT)>68$|Qqx>u@3=DIsDtszZ{Bp8f3W~X& z{<-t1Gvw=0<9?$xZx+4_I`RC6){mSDw&?mVsmDDx^5joCdUi^IibTu?N3Pj3)i{d- zmCxtZZ)M+*Gof;o#H5fjHfxf)E7C1?h!l8<?ht~Ffcd1p%ct0Nyig--kw7xZ*lW3I=mgV$v`RyURQ8?NN` z6;)+^THm`hjp3iWl(KX3wv(RW<&GOw^0%v8OAvn{75jf(eYUKTsp{63vH92gIjxqS z@{VzkQr8#ns7PiL3yhN&{JgsDo%NrS6*oWleqU#z6x6YH!;w1{zBzkW&AG%_ZDwQq z(tC1PSjr~#JI3z%i~c{BHg_&XQ1T!hoi`&yN-ojc)Pxz+8xd+8?;Lz-X>FD;#~czExR^lY>J3|Y~Fk$aL%^tfdxD#qjnu; z)hYgGxa(J0^mI?pNAJ71 zjQ%fKas19txvy3SAC*4QGgslCDl=)zx4GRDPfajC)%kDMzLzZ1H{QFy_Q?FgpLxfx znA8UeNmv~VWeS`BO5$(JKJg!oXP1fI_CE2&?Vjer>8z7J<^6igvFYfKpnAy}c6(MA zKELnlWfb_h>rEt=?`_>Dr_F3`zp(swYpSHSnfG3^i&L!TD&G%xSZZ_q%0ur78q^S^iHOIhySL((m3EArl>=^>5 zxaoyX-t|5+F#AaCDn+l#&iO{?_hu;7uYDAnZhhuO+Oe65i>C2L#RuQLVg7Q}($j^3 z88Q844Fy?ICDRXoO}A;g9Jh0S-2QFfQg*Wl9)5l^{Rsc+NzGkPc)qni`>DYen?Pd z{$F|9aFgJ>GzN32Bsu+$&y9rj6@*r1J71R-Kj~2!;TJY<`wORt&2~e1Xh^3N2GU5s4&^${3GQb%l{r1%N>t)PPuy4cx}fco#r^>2WKaFUih`@b)Jo( zFzY+pwBsv8{Z&}RryA$4`?hY~juidhDz9f8(S7>(#r|ps(S&|RBc0<@miVt;FIubh ieO-@$%!6Niua5?;s^Hh(@%6@w8g2F5!;AQmmAe4~EDP}f delta 1926 zcmaFCca497PQ9x~M4nSwM1)U-S*d=mg>Q0DMNnQe?UTS(-l)h_XVxV(+Ub?Aextp7jZ&9ZmS=Iemy<_kMOt27il3vSkBfh1MPOq7 z#E;_P!LHe9o}RgxktW^+X~u~@hEW;Lj^UNs+Kwd_PF4N}W)&vsu7*x-l{wj5=E;#> zDG}i%RhebsL74%*720LSKH(X@-hs}p{$2%XMH%kluI^z@QTdUR;~B-n104%oe0)>W z&D?WJ9i3cCaxIFzJS+TClMTErLxMt#jWb=0U46p6^D{lUObc>zi~R!)JqwG%%ZiEv ziZcwe{1Zb>ydy#j0>WGk%9Dc3JuAHn@>~rjpJf!UcQW%2D$4W;42(*O2r7=$4@)*o zD|B)8G)~E|Fpf$~ad-1{FGM zvJl6N%&bDcsM74HfN=9*PcGlG^ziJm0E1lP6yu7hic*UzQ?Jb6s-n#N;Ph1g%z`A} z2=9;x%gRU>S9I%~9mCyC0u@qS49YXJD?Cio3tT+2!$VB6wDa^+48678Bhr1%GK!r- z>%+sGN(-}#!d4t4R%E%5em4f81S zugK>LHO$B}2=J^gj!bna!WQS%P?}R3^(%R($&>f zNXqpnG;lMpH1^X^_DxT7O^zt43et8=&Cd@s3Qlq?u}DnucP|Yri^xyrGUMOdYu8;W zcJ6;&n!$o4v#-d`)9zhsaCGUJ%a$jZ9fJNw&zdqjq4rnAwfcqXFHEd{8^uqWD9u;I z`oemaU>bmEaC&s_$aOzEcE7ONj@_z() zg#QL+%yBvGn6+<@W1mUCRjuQOg~v7Juf6*ED6G?P&lKAphl?9@nLfnst>pRWa*=V~ z;-joT-G8i7Ex361w5-Tifg|E&bvp~f9-h#dclyIiA?q#s#6OB3-SBhK@d*m`yH2tl z5J;@rk~HQ2-B+h?YxBpyc(;b{Re(Xmvpuss=Er_GH!D1N(aJQ%=$2!DqQBqWJ*Ab4 zMKj$jWXiS6QBvnhz8&g45+qR&)Mpg5^j7NQO0}v~f$raPdP^C&tG4og$emN^dUeJD z9>sNM`KG!lUNCRvXXQQacU@;A%MU}oH7xZ~>8DGZE$<1)UjG}vA*!V*2)o59ew(sTE)oTiJIxWsgPARo!Y%tRMbS|X9=oAyDtxR#yLQSKw5(^%S*xjB8?7zaxmoF=i_GGGO9H+zsXY!#`IKJInT5uiN*)qkhdDe>~kq#`PR$Bv41w<~Ie%)z<-j-G^1;bTbavH3QCj8}o zbNFS0_GRDYLN$}3IpY1F-71{DVMEHjbla_O{3S2ew$aHA?*J3QvJ$l=bzo{Ph5LOa_X!t(X&6ViWNBEyn5EunVV;BWKAypdtmv# z4`*&&D=AD)T$yu2n1#n>&vJ_szn49_aBD?Cf^!Pr>7R#f<5r#d@UT(msH<#J$@#mC zd4Wkh?rL9~CqInV3Vig|2m=VmF}GbSC6jHkruEv{ae^n+I3DeK*slW($=%< aFHSkdqO?m-h;!#8|L${N_m`~YHwFMy!YFG1 diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age index ecd0593eb5b92071f8dedb20f3aa0d1ba2646b05..a498056fcc84bf41240facabae6993a82932b2a8 100644 GIT binary patch delta 481 zcmbQnGL2<|PJLuph<1Kfnz^w@Sz2+Dmwv8(iJ3=uXogRoWks1^N=BriYe-08ieZ6K zAeWnenrTY0lVw;+ns!AN0FafenDAOxoK#*e}s0JK|rvDi?e6( z#E;_j!NJ8Y79~|hSy?7Ip?+?Dxm8&fkx53G5rv_~=BCL>DVeDrZvMso&K@RQ`W41* z`MyC#B_?L3e%|gOxvAlvrRJtCrCDLw$=*hm>B$EACP@(?X`Yc>y1Kdw`sMn0l_^D` z;St7Zexa^~Wfn%|o|Z07PKilDkTu*p=7F>B?e3VsxgJok) zLeT^}?#{VOjdXL=V!ql(?BD3Mruc;>_r3SAOskF7dhC@td3pw4$#SvK%$p3-a+3tI zzQ0adG4JTiwTX+unRCDXJhrL++Hy}F-kDKh_BxL*&yA?$UsA>2zhL6#iM)&cs44r2 bG(@@f=?ECJ#62-qoh!zgEc>bF-xdP^YoD%o delta 481 zcmbQnGL2<|PJK#xh(WSZvYVf)cW_0hpOIOTNqAX=pND0BiAPpWR9Ts?en6o`evxxf zK37(Xqo=wpn&oNQr-_vvWm& zVPdIsQGlTC{%J{G&Z*|X=6Q}09-;1u*_LH4If2DoW~IJq zkw(RtX+DvTDM^t*QEmZ=gR0NhOY4y1KdwP9Y&x`Ie>5 z;il;+c}7{jMa2;Y8NQj3$&MwZWmP#jQTiVB1={HbPDR;~T*vO8oO3?3wDL|6vzp*7 z0masdm$y8c>@2(O%*isb%h4gi`4_qC6V%GZ_{^^zf4cPZhU*1oz5*AHUfF!~uhNZa zF~Y4suDP~m95|5I@vih|>Hpe4uPtV?%ddWIdxN1PJ#mwti0qvlzMpfwCQn~@-v7># d3ku6#Jkt`$Eua1O?<@AFd)bd|^m@H$5&$o9x?}(V -- 2.49.0 From 008584b465e85f85c64f77c55868c9e4646c2b18 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 13:41:36 +0200 Subject: [PATCH 313/472] Disable home via NFS in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It won't be accesible anymore as we won't be in the same LAN. Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index e33afab..24a3d2c 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -2,7 +2,9 @@ { imports = [ - ../common/xeon.nix + ../common/base.nix + ../common/xeon/console.nix + ../common/xeon/net.nix ../module/emulation.nix ]; -- 2.49.0 From 6c1afa3fd8619fd92cb08dfd9191ca613b279bff Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 26 May 2025 14:17:06 +0200 Subject: [PATCH 314/472] Update configuration for UPC network MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fox machine will be placed in the UPC network, so we update the configuration with the new IP and gateway. We won't be able to reach hut directly so we also remove the host entry and proxy. Reviewed-by: Aleix Boné --- m/common/xeon/net.nix | 4 ---- m/fox/configuration.nix | 28 +++++++++++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/m/common/xeon/net.nix b/m/common/xeon/net.nix index 09e83ed..dfd85f8 100644 --- a/m/common/xeon/net.nix +++ b/m/common/xeon/net.nix @@ -85,10 +85,6 @@ 10.0.40.8 eudy xeon08 xeon08-eth0 10.0.42.8 eudy-ib xeon08-ib0 10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi - - # fox - 10.0.40.26 fox - 10.0.40.126 fox-ipmi ''; }; } diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 24a3d2c..29d8fe9 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -4,7 +4,6 @@ imports = [ ../common/base.nix ../common/xeon/console.nix - ../common/xeon/net.nix ../module/emulation.nix ]; @@ -21,11 +20,30 @@ hardware.cpu.intel.updateMicrocode = lib.mkForce false; networking = { + defaultGateway = "147.83.30.130"; + nameservers = [ "8.8.8.8" ]; hostName = "fox"; - interfaces.enp1s0f0np0.ipv4.addresses = [ { - address = "10.0.40.26"; - prefixLength = 24; - } ]; + interfaces.enp1s0f0np0.ipv4.addresses = [ + { + # UPC network + # Public IP configuration: + # - Hostname: fox.ac.upc.edu + # - IP: 147.83.30.141 + # - Gateway: 147.83.30.130 + # - NetMask: 255.255.255.192 + # Private IP configuration for BMC: + # - Hostname: fox-ipmi.ac.upc.edu + # - IP: 147.83.35.27 + # - Gateway: 147.83.35.2 + # - NetMask: 255.255.255.0 + address = "147.83.30.141"; + prefixLength = 26; # 255.255.255.192 + } + ]; + extraHosts = '' + 147.83.30.141 fox.ac.upc.edu + 147.83.35.27 fox-ipmi.ac.upc.edu + ''; }; # Configure Nvidia driver to use with CUDA -- 2.49.0 From f8fc391cae697b2a89523a2498ff3d51387ea742 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 28 May 2025 13:03:01 +0200 Subject: [PATCH 315/472] Monitor fox, gateway and UPC anella via ICMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fox should reply once the machine is connected to the UPC network. Monitoring also the gateway and UPC anella allows us to estimate if the whole network is down or just fox. Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 7042c91..dd3893a 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -169,6 +169,9 @@ "8.8.8.8" "ssfhead" "anella-bsc.cesca.cat" + "upc-anella.cesca.cat" + "fox.ac.upc.edu" + "arenys5.ac.upc.edu" ]; }]; relabel_configs = [ -- 2.49.0 From e8ac6cf0f3604e0a711fe9713ded90ea8bbbabf7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 2 Jun 2025 07:55:11 +0200 Subject: [PATCH 316/472] Remove fox monitoring via IPMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will need to setup an VPN to be able to access fox in its new location, so for now we simply remove the IPMI monitoring. Reviewed-by: Aleix Boné --- m/hut/monitoring.nix | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index dd3893a..db5f49f 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -267,17 +267,6 @@ } ]; } - { - job_name = "ipmi-fox"; - metrics_path = "/ipmi"; - static_configs = [ - { targets = [ "127.0.0.1:9290" ]; } - ]; - params = { - target = [ "fox-ipmi" ]; - module = [ "fox" ]; - }; - } ]; }; } -- 2.49.0 From 557618d43f3b77ae147a0b219cc2e6aaa4fa2007 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 2 Jun 2025 11:12:30 +0200 Subject: [PATCH 317/472] Add machine map file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documents the location, board and serial numbers so we can track the machines if they move around. Some information is unkown. Using the Nix language to encode the machines location and properties allows us to later use that information in the configuration of the machines themselves. Reviewed-by: Aleix Boné --- m/map.nix | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 m/map.nix diff --git a/m/map.nix b/m/map.nix new file mode 100644 index 0000000..6c31060 --- /dev/null +++ b/m/map.nix @@ -0,0 +1,69 @@ +{ + # In physical order from top to bottom (see note below) + ssf = { + # Switches for Ethernet and OmniPath + switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; }; + switch-opa = { pos=41; size=1; }; + + # SSF login + ssfhead = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="operations@bsc.es"; }; + + # Storage + bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; }; + lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; }; + lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; }; + + # Compute xeon + owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; }; + owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; }; + xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; }; + xeon04 = { pos=32; size=1; label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; }; + koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; }; + xeon06 = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; }; + hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; }; + eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; }; + + # 16 KNL nodes, 4 per chassis + knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; }; + knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; }; + knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; }; + knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; }; + + # Slot 19 empty + + # EPI (hw team, guessed order) + epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; }; + epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; }; + epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; }; + anon = { pos=14; size=2; }; # Unlabeled machine. Operative + + # These are old and decommissioned (off) + power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; }; + powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; }; + gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; }; + odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; }; + amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic + moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; }; + }; + + bsc2218 = { + raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; }; + }; + + upc = { + fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; }; + }; + + # NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the + # bottom. Example: + # + # | ... | - [pos+size] <--- Label in chassis + # +--------+ + # | node | - [pos+1] + # | 2U | - [pos] + # +------- + + # | ... | - [pos-1] + # + # NOTE: The board and sn refers to the FRU information (Board Product and + # Board Serial) via `ipmitool fru print 0`. +} -- 2.49.0 From 2b08fcd21a6aea9d18c673fde0ddad1e3ac81c8a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 10 Sep 2024 15:03:03 +0200 Subject: [PATCH 318/472] Only proxy SSH git remotes via hut in xeon Other machines like raccoon have direct access. Reviewed-by: Aleix Roca Nonell --- m/common/base/ssh.nix | 4 ---- m/common/xeon.nix | 3 ++- m/common/xeon/ssh.nix | 8 ++++++++ 3 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 m/common/xeon/ssh.nix diff --git a/m/common/base/ssh.nix b/m/common/base/ssh.nix index b531076..92c2c70 100644 --- a/m/common/base/ssh.nix +++ b/m/common/base/ssh.nix @@ -13,10 +13,6 @@ in Host bscpm02.bsc.es bscpm03.bsc.es bscpm04.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es User git ProxyCommand nc -X connect -x hut:23080 %h %p - - # Connect to BSC machines via hut proxy too - Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es - ProxyCommand nc -X connect -x hut:23080 %h %p ''; programs.ssh.knownHosts = hostsKeys // { diff --git a/m/common/xeon.nix b/m/common/xeon.nix index 30cf73c..146e018 100644 --- a/m/common/xeon.nix +++ b/m/common/xeon.nix @@ -2,8 +2,9 @@ # Provides the base system for a xeon node. imports = [ ./base.nix - ./xeon/fs.nix ./xeon/console.nix + ./xeon/fs.nix ./xeon/net.nix + ./xeon/ssh.nix ]; } diff --git a/m/common/xeon/ssh.nix b/m/common/xeon/ssh.nix new file mode 100644 index 0000000..86978f9 --- /dev/null +++ b/m/common/xeon/ssh.nix @@ -0,0 +1,8 @@ +{ + # Connect to intranet git hosts via proxy + programs.ssh.extraConfig = '' + # Connect to BSC machines via hut proxy too + Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es + ProxyCommand nc -X connect -x hut:23080 %h %p + ''; +} -- 2.49.0 From 43dc33663864202112a94e3fd51c9c3f0618a313 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 14 Oct 2024 19:12:25 +0200 Subject: [PATCH 319/472] Enable linger for user rarias Allows services to run without a login session. Reviewed-by: Aleix Roca Nonell --- m/common/base/users.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 229be8c..c84c53c 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -20,6 +20,7 @@ rarias = { uid = 1880; isNormalUser = true; + linger = true; home = "/home/Computational/rarias"; description = "Rodrigo Arias"; group = "Computational"; -- 2.49.0 From 8fedc5518e949229adfe6deb2ff1799541d0298f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 18 Feb 2025 16:19:04 +0100 Subject: [PATCH 320/472] Allow X11 forwarding via SSH Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index dd15a82..7ceb424 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -37,6 +37,8 @@ nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; + services.openssh.settings.X11Forwarding = true; + users.motd = '' ⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ⠀⠀⠀⠀⠀⠀⢰⠇⡀⠀⠙⠻⡿⣦⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⠀⠀⠀ -- 2.49.0 From 4d03842f7c8c9da10017762d3508cc1d310fdebc Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 25 Feb 2025 17:11:09 +0100 Subject: [PATCH 321/472] Add node exporter monitoring in raccoon Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 7ceb424..9585aef 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -39,6 +39,13 @@ services.openssh.settings.X11Forwarding = true; + services.prometheus.exporters.node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + listenAddress = "127.0.0.1"; + }; + users.motd = '' ⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ⠀⠀⠀⠀⠀⠀⢰⠇⡀⠀⠙⠻⡿⣦⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⠀⠀⠀ -- 2.49.0 From 62ec4e014a86cae00d84ba7ad7108cec50ea51ce Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 3 Mar 2025 13:55:23 +0100 Subject: [PATCH 322/472] Add dbautist user to raccoon machine Reviewed-by: Aleix Roca Nonell --- m/common/base/users.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index c84c53c..80aeda0 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -108,7 +108,7 @@ home = "/home/Computational/dbautist"; description = "Dylan Bautista Cases"; group = "Computational"; - hosts = [ "hut" ]; + hosts = [ "hut" "raccoon" ]; hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" -- 2.49.0 From 9dc67d402fea8489362ef3572e48393a3439e4af Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 18 Mar 2025 16:48:47 +0100 Subject: [PATCH 323/472] Disable nix garbage collector in raccoon Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 9585aef..1a38442 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -37,6 +37,9 @@ nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; + # Disable garbage collection for now + nix.gc.automatic = lib.mkForce false; + services.openssh.settings.X11Forwarding = true; services.prometheus.exporters.node = { -- 2.49.0 From e51ef52721106c8f2fd9177d52bef3ca2050c372 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 21 Mar 2025 17:51:41 +0100 Subject: [PATCH 324/472] Enable binfmt emulation in raccoon Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 1a38442..28fa92d 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/base.nix + ../module/emulation.nix ]; # Don't install Grub on the disk yet -- 2.49.0 From eec3e27d660e2e76b39a3b9e3ea2b23579acaaef Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 5 May 2025 10:50:43 +0200 Subject: [PATCH 325/472] Make raccoon use performance governor Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 28fa92d..0e11ed4 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -31,6 +31,9 @@ trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; }; + # Enable performance governor + powerManagement.cpuFreqGovernor = "performance"; + # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; hardware.graphics.enable = true; -- 2.49.0 From 2bca10b0e42ea267831c8731ce3f309a861f7b31 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Tue, 6 May 2025 14:39:48 +0200 Subject: [PATCH 326/472] Enable nixdebuginfod in raccoon Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 0e11ed4..18c79fb 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/base.nix ../module/emulation.nix + ../module/debuginfod.nix ]; # Don't install Grub on the disk yet -- 2.49.0 From 8b43a6ffb6bf31be57ce08453608dce448339c3f Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Thu, 15 May 2025 12:21:26 +0200 Subject: [PATCH 327/472] Extend perf support in raccoon Reviewed-by: Aleix Roca Nonell --- m/raccoon/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 18c79fb..2482a16 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -5,6 +5,7 @@ ../common/base.nix ../module/emulation.nix ../module/debuginfod.nix + ../eudy/kernel/perf.nix ]; # Don't install Grub on the disk yet -- 2.49.0 From f0c4206ab81019da4d18e11d24535bb7b0ed7482 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Thu, 15 May 2025 12:24:49 +0200 Subject: [PATCH 328/472] Create tracing group and add arocanon in raccoon Reviewed-by: Aleix Roca Nonell --- m/common/base/users.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 80aeda0..8e2ca80 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -40,7 +40,7 @@ home = "/home/Computational/arocanon"; description = "Aleix Roca"; group = "Computational"; - extraGroups = [ "wheel" ]; + extraGroups = [ "wheel" "tracing" ]; hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" @@ -144,6 +144,7 @@ groups = { Computational = { gid = 564; }; + tracing = { }; }; }; } -- 2.49.0 From 83efd6c876f68739cd94013e559cf941aebe816c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 6 Jun 2025 14:44:47 +0200 Subject: [PATCH 329/472] Use UPC time servers as others are blocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 29d8fe9..b87f85c 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -22,6 +22,7 @@ networking = { defaultGateway = "147.83.30.130"; nameservers = [ "8.8.8.8" ]; + timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ]; hostName = "fox"; interfaces.enp1s0f0np0.ipv4.addresses = [ { -- 2.49.0 From e40fd24f2627748be2390536ea8397348de88d17 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 6 Jun 2025 15:11:12 +0200 Subject: [PATCH 330/472] Use DHCP for Ethernet in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index b87f85c..79688cd 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -20,31 +20,20 @@ hardware.cpu.intel.updateMicrocode = lib.mkForce false; networking = { - defaultGateway = "147.83.30.130"; - nameservers = [ "8.8.8.8" ]; timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ]; hostName = "fox"; - interfaces.enp1s0f0np0.ipv4.addresses = [ - { - # UPC network - # Public IP configuration: - # - Hostname: fox.ac.upc.edu - # - IP: 147.83.30.141 - # - Gateway: 147.83.30.130 - # - NetMask: 255.255.255.192 - # Private IP configuration for BMC: - # - Hostname: fox-ipmi.ac.upc.edu - # - IP: 147.83.35.27 - # - Gateway: 147.83.35.2 - # - NetMask: 255.255.255.0 - address = "147.83.30.141"; - prefixLength = 26; # 255.255.255.192 - } - ]; - extraHosts = '' - 147.83.30.141 fox.ac.upc.edu - 147.83.35.27 fox-ipmi.ac.upc.edu - ''; + # UPC network (may change over time, use DHCP) + # Public IP configuration: + # - Hostname: fox.ac.upc.edu + # - IP: 147.83.30.141 + # - Gateway: 147.83.30.130 + # - NetMask: 255.255.255.192 + # Private IP configuration for BMC: + # - Hostname: fox-ipmi.ac.upc.edu + # - IP: 147.83.35.27 + # - Gateway: 147.83.35.2 + # - NetMask: 255.255.255.0 + interfaces.enp1s0f0np0.useDHCP = true; }; # Configure Nvidia driver to use with CUDA -- 2.49.0 From d00f996f595748e3e520b1d72e222d21b84529ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Tue, 3 Jun 2025 17:59:17 +0200 Subject: [PATCH 331/472] Use extra- for substituters and trusted-public-keys From the nix manual: > A configuration setting usually overrides any previous value. However, > for settings that take a list of items, you can prefix the name of the > setting by extra- to append to the previous value. Reviewed-by: Rodrigo Arias Mallo --- m/module/hut-substituter.nix | 4 ++-- m/raccoon/configuration.nix | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/m/module/hut-substituter.nix b/m/module/hut-substituter.nix index 9b871f6..8d27a45 100644 --- a/m/module/hut-substituter.nix +++ b/m/module/hut-substituter.nix @@ -4,7 +4,7 @@ # Don't add hut as a cache to itself assert config.networking.hostName != "hut"; { - substituters = [ "http://hut/cache" ]; - trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + extra-substituters = [ "http://hut/cache" ]; + extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; }; } diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 2482a16..28d4da8 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -29,8 +29,8 @@ }; nix.settings = { - substituters = [ "https://jungle.bsc.es/cache" ]; - trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + extra-substituters = [ "https://jungle.bsc.es/cache" ]; + extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; }; # Enable performance governor -- 2.49.0 From 8f80ed2ccec432951fe674c9d5b22fcd8967eecc Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 10 Jun 2025 18:23:20 +0200 Subject: [PATCH 332/472] Add hut as nix cache in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 79688cd..f2087e2 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -36,6 +36,12 @@ interfaces.enp1s0f0np0.useDHCP = true; }; + # Use hut for cache + nix.settings = { + extra-substituters = [ "https://jungle.bsc.es/cache" ]; + extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + }; + # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; hardware.graphics.enable = true; -- 2.49.0 From 7d3c7342aeb4ec202e6924d5d7d67aa835d8d90d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 10 Jun 2025 14:37:39 +0200 Subject: [PATCH 333/472] Use performance governor in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index f2087e2..ea793e7 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -19,6 +19,9 @@ hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; hardware.cpu.intel.updateMicrocode = lib.mkForce false; + # Use performance for benchmarks + powerManagement.cpuFreqGovernor = "performance"; + networking = { timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ]; hostName = "fox"; -- 2.49.0 From e9e3704b677baed1649583f25e4e1bc050a9534e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 10 Jun 2025 14:38:02 +0200 Subject: [PATCH 334/472] Only allow Vincent to access fox for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Needed to run benchmarks without interference. Reviewed-by: Aleix Boné --- m/common/base/users.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 8e2ca80..9f20b00 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -69,7 +69,7 @@ home = "/home/Computational/anavarro"; description = "Antoni Navarro"; group = "Computational"; - hosts = [ "hut" "raccoon" "fox" ]; + hosts = [ "hut" "raccoon" ]; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" @@ -82,7 +82,7 @@ home = "/home/Computational/abonerib"; description = "Aleix Boné"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" "raccoon" "fox" ]; + hosts = [ "owl1" "owl2" "hut" "raccoon" ]; hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" @@ -121,7 +121,7 @@ home = "/home/Computational/dalvare1"; description = "David Álvarez"; group = "Computational"; - hosts = [ "hut" "fox" ]; + hosts = [ "hut" ]; hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" -- 2.49.0 From e13288fc29ca5eed235c205511931a0fd631e54c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 2 Jun 2025 12:22:41 +0200 Subject: [PATCH 335/472] Create specific SSF rack configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow xeon machines to optionally inherit SSF configuration such as the NFS mount point and the network configuration. Reviewed-by: Aleix Boné --- m/bay/configuration.nix | 2 +- m/common/ssf.nix | 9 +++++++++ m/common/{xeon => ssf}/fs.nix | 0 m/common/{xeon => ssf}/net.nix | 0 m/common/{xeon => ssf}/ssh.nix | 0 m/common/xeon.nix | 5 +---- m/eudy/configuration.nix | 2 +- m/hut/configuration.nix | 2 +- m/koro/configuration.nix | 2 +- m/lake2/configuration.nix | 2 +- m/owl1/configuration.nix | 2 +- m/owl2/configuration.nix | 2 +- 12 files changed, 17 insertions(+), 11 deletions(-) create mode 100644 m/common/ssf.nix rename m/common/{xeon => ssf}/fs.nix (100%) rename m/common/{xeon => ssf}/net.nix (100%) rename m/common/{xeon => ssf}/ssh.nix (100%) diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 5f215cf..5c6f93c 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/xeon.nix + ../common/ssf.nix ../module/monitoring.nix ]; diff --git a/m/common/ssf.nix b/m/common/ssf.nix new file mode 100644 index 0000000..a01839b --- /dev/null +++ b/m/common/ssf.nix @@ -0,0 +1,9 @@ +{ + # Provides the base system for a xeon node in the SSF rack. + imports = [ + ./xeon.nix + ./ssf/fs.nix + ./ssf/net.nix + ./ssf/ssh.nix + ]; +} diff --git a/m/common/xeon/fs.nix b/m/common/ssf/fs.nix similarity index 100% rename from m/common/xeon/fs.nix rename to m/common/ssf/fs.nix diff --git a/m/common/xeon/net.nix b/m/common/ssf/net.nix similarity index 100% rename from m/common/xeon/net.nix rename to m/common/ssf/net.nix diff --git a/m/common/xeon/ssh.nix b/m/common/ssf/ssh.nix similarity index 100% rename from m/common/xeon/ssh.nix rename to m/common/ssf/ssh.nix diff --git a/m/common/xeon.nix b/m/common/xeon.nix index 146e018..1394660 100644 --- a/m/common/xeon.nix +++ b/m/common/xeon.nix @@ -1,10 +1,7 @@ { - # Provides the base system for a xeon node. + # Provides the base system for a xeon node, not necessarily in the SSF rack. imports = [ ./base.nix ./xeon/console.nix - ./xeon/fs.nix - ./xeon/net.nix - ./xeon/ssh.nix ]; } diff --git a/m/eudy/configuration.nix b/m/eudy/configuration.nix index 29d495a..d676135 100644 --- a/m/eudy/configuration.nix +++ b/m/eudy/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/xeon.nix + ../common/ssf.nix #(modulesPath + "/installer/netboot/netboot-minimal.nix") ./kernel/kernel.nix diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index b00351d..493208c 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/xeon.nix + ../common/ssf.nix ../module/ceph.nix ../module/debuginfod.nix diff --git a/m/koro/configuration.nix b/m/koro/configuration.nix index 4214fee..a106b62 100644 --- a/m/koro/configuration.nix +++ b/m/koro/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/xeon.nix + ../common/ssf.nix #(modulesPath + "/installer/netboot/netboot-minimal.nix") ../eudy/cpufreq.nix diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index 2a29ae7..e2f350d 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/xeon.nix + ../common/ssf.nix ../module/monitoring.nix ]; diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index 20e3cf5..e471969 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/xeon.nix + ../common/ssf.nix ../module/ceph.nix ../module/emulation.nix ../module/slurm-client.nix diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 54849f6..e28c5e5 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -2,7 +2,7 @@ { imports = [ - ../common/xeon.nix + ../common/ssf.nix ../module/ceph.nix ../module/emulation.nix ../module/slurm-client.nix -- 2.49.0 From c3c3614f6338875c82a101487ad823b0dc152b64 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 2 Jun 2025 09:07:32 +0200 Subject: [PATCH 336/472] Add tent machine from xeon04 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We moved the tent machine to the server room in the BSC building and is now directly connected to the raccoon via NAT. Fixes: https://jungle.bsc.es/git/rarias/jungle/issues/106 Reviewed-by: Aleix Boné --- flake.nix | 1 + m/map.nix | 3 ++- m/raccoon/configuration.nix | 12 ++++++++++++ m/tent/configuration.nix | 38 +++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 m/tent/configuration.nix diff --git a/flake.nix b/flake.nix index 037bba2..67ae087 100644 --- a/flake.nix +++ b/flake.nix @@ -18,6 +18,7 @@ in { nixosConfigurations = { hut = mkConf "hut"; + tent = mkConf "tent"; owl1 = mkConf "owl1"; owl2 = mkConf "owl2"; eudy = mkConf "eudy"; diff --git a/m/map.nix b/m/map.nix index 6c31060..606d417 100644 --- a/m/map.nix +++ b/m/map.nix @@ -17,7 +17,7 @@ owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; }; owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; }; xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; }; - xeon04 = { pos=32; size=1; label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; }; + # Slot 34 empty koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; }; xeon06 = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; }; hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; }; @@ -48,6 +48,7 @@ bsc2218 = { raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; }; + tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; }; }; upc = { diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 28d4da8..3e70f4b 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -26,6 +26,18 @@ address = "84.88.51.152"; prefixLength = 25; } ]; + interfaces.enp5s0f1.ipv4.addresses = [ { + address = "10.0.44.1"; + prefixLength = 24; + } ]; + nat = { + enable = true; + internalInterfaces = [ "enp5s0f1" ]; + externalInterface = "eno0"; + }; + hosts = { + "10.0.44.4" = [ "tent" ]; + }; }; nix.settings = { diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix new file mode 100644 index 0000000..628ae39 --- /dev/null +++ b/m/tent/configuration.nix @@ -0,0 +1,38 @@ +{ config, pkgs, ... }: + +{ + imports = [ + ../common/xeon.nix + ../module/emulation.nix + ../module/debuginfod.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675"; + + networking = { + hostName = "tent"; + interfaces.eno1.ipv4.addresses = [ + { + address = "10.0.44.4"; + prefixLength = 24; + } + ]; + + # Only BSC DNSs seem to be reachable from the office VLAN + nameservers = [ "84.88.52.35" "84.88.52.36" ]; + defaultGateway = "10.0.44.1"; + }; + + nix.settings = { + extra-substituters = [ "https://jungle.bsc.es/cache" ]; + extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + }; + + services.prometheus.exporters.node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + listenAddress = "127.0.0.1"; + }; +} -- 2.49.0 From 7176b066bbeaf87b1dfe853f2fa71ecad1751d4c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 3 Jun 2025 12:52:10 +0200 Subject: [PATCH 337/472] Don't use proxy in base preset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/ssh.nix | 7 ------- 1 file changed, 7 deletions(-) diff --git a/m/common/base/ssh.nix b/m/common/base/ssh.nix index 92c2c70..53de423 100644 --- a/m/common/base/ssh.nix +++ b/m/common/base/ssh.nix @@ -8,13 +8,6 @@ in # Enable the OpenSSH daemon. services.openssh.enable = true; - # Connect to intranet git hosts via proxy - programs.ssh.extraConfig = '' - Host bscpm02.bsc.es bscpm03.bsc.es bscpm04.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es - User git - ProxyCommand nc -X connect -x hut:23080 %h %p - ''; - programs.ssh.knownHosts = hostsKeys // { "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; -- 2.49.0 From 028b151c78dfdc54916c8958f2b73ef390d393d6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 3 Jun 2025 17:17:29 +0200 Subject: [PATCH 338/472] Add hut SSH configuration from outside SSF LAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/ssh-hut-extern.nix | 9 +++++++++ m/raccoon/configuration.nix | 1 + m/tent/configuration.nix | 1 + 3 files changed, 11 insertions(+) create mode 100644 m/module/ssh-hut-extern.nix diff --git a/m/module/ssh-hut-extern.nix b/m/module/ssh-hut-extern.nix new file mode 100644 index 0000000..5eeafb3 --- /dev/null +++ b/m/module/ssh-hut-extern.nix @@ -0,0 +1,9 @@ +{ + programs.ssh.extraConfig = '' + Host ssfhead + HostName ssflogin.bsc.es + Host hut + ProxyJump ssfhead + HostName xeon07 + ''; +} diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 3e70f4b..5f75f99 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -5,6 +5,7 @@ ../common/base.nix ../module/emulation.nix ../module/debuginfod.nix + ../module/ssh-hut-extern.nix ../eudy/kernel/perf.nix ]; diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 628ae39..1365a0d 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -5,6 +5,7 @@ ../common/xeon.nix ../module/emulation.nix ../module/debuginfod.nix + ../module/ssh-hut-extern.nix ]; # Select the this using the ID to avoid mismatches -- 2.49.0 From 18461c0d596c624dc72ea720d37546ee51bd117b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 3 Jun 2025 17:24:40 +0200 Subject: [PATCH 339/472] Add access to tent to all hut users too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 9f20b00..7e9817a 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -56,7 +56,7 @@ home = "/home/Computational/rpenacob"; description = "Raúl Peñacoba"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" ]; + hosts = [ "owl1" "owl2" "hut" "tent" ]; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" @@ -69,7 +69,7 @@ home = "/home/Computational/anavarro"; description = "Antoni Navarro"; group = "Computational"; - hosts = [ "hut" "raccoon" ]; + hosts = [ "hut" "tent" "raccoon" ]; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" @@ -82,7 +82,7 @@ home = "/home/Computational/abonerib"; description = "Aleix Boné"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" "raccoon" ]; + hosts = [ "owl1" "owl2" "hut" "tent" "raccoon" ]; hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" @@ -108,7 +108,7 @@ home = "/home/Computational/dbautist"; description = "Dylan Bautista Cases"; group = "Computational"; - hosts = [ "hut" "raccoon" ]; + hosts = [ "hut" "tent" "raccoon" ]; hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" @@ -121,7 +121,7 @@ home = "/home/Computational/dalvare1"; description = "David Álvarez"; group = "Computational"; - hosts = [ "hut" ]; + hosts = [ "hut" "tent" ]; hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" @@ -134,7 +134,7 @@ home = "/home/Computational/varcila"; description = "Vincent Arcila"; group = "Computational"; - hosts = [ "hut" "fox" ]; + hosts = [ "hut" "tent" "fox" ]; hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch" -- 2.49.0 From 766da210976f95cca41f920f78e0bbfaff3570ff Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 3 Jun 2025 18:27:56 +0200 Subject: [PATCH 340/472] Add software RAID in tent using 3 disks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/tent/configuration.nix | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 1365a0d..9e6a6a2 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -36,4 +36,18 @@ port = 9002; listenAddress = "127.0.0.1"; }; + + boot.swraid = { + enable = true; + mdadmConf = '' + DEVICE partitions + ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d + MAILADDR root + ''; + }; + + fileSystems."/vault" = { + device = "/dev/disk/by-label/vault"; + fsType = "ext4"; + }; } -- 2.49.0 From df67b6cd267eceb8858766955f0ba9e0254267f7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 3 Jun 2025 19:07:43 +0200 Subject: [PATCH 341/472] Create directories in /vault/home for tent users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/tent/configuration.nix | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 9e6a6a2..f31552a 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -1,4 +1,4 @@ -{ config, pkgs, ... }: +{ config, pkgs, lib, ... }: { imports = [ @@ -50,4 +50,21 @@ device = "/dev/disk/by-label/vault"; fsType = "ext4"; }; + + # Make a /vault/$USER directory for each user. + systemd.services.create-vault-dirs = let + # Take only normal users in tent + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + commands = lib.concatLists (lib.mapAttrsToList + (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0711 /vault/home/${user.name}" + ]) users); + script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands); + in { + enable = true; + wants = [ "local-fs.target" ]; + after = [ "local-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = script; + }; } -- 2.49.0 From 253426ce00a3a3c5078236af687830c1e1aa094c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 5 Jun 2025 11:07:00 +0200 Subject: [PATCH 342/472] Add tent host key and admin keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- keys.nix | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/keys.nix b/keys.nix index 83b8ff4..ad8e304 100644 --- a/keys.nix +++ b/keys.nix @@ -10,6 +10,7 @@ rec { bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; + tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent"; }; hostGroup = with hosts; rec { @@ -25,7 +26,8 @@ rec { }; admins = { - rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; - root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"; + "rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; + "rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent"; + root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"; }; } -- 2.49.0 From 5223ea53f6ea8dddc86516e9c73077683db591a6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 5 Jun 2025 11:09:15 +0200 Subject: [PATCH 343/472] Rekey secrets with tent keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- secrets/ceph-user.age | Bin 913 -> 1023 bytes secrets/gitea-runner-token.age | Bin 479 -> 589 bytes secrets/gitlab-bsc-docker-token.age | Bin 519 -> 629 bytes secrets/gitlab-runner-docker-token.age | 18 ++++++++++-------- secrets/gitlab-runner-shell-token.age | Bin 516 -> 626 bytes secrets/ipmi.yml.age | Bin 1184 -> 1294 bytes secrets/jungle-robot-password.age | Bin 477 -> 587 bytes secrets/munge-key.age | Bin 1896 -> 2006 bytes secrets/nix-serve.age | Bin 534 -> 644 bytes 9 files changed, 10 insertions(+), 8 deletions(-) diff --git a/secrets/ceph-user.age b/secrets/ceph-user.age index c37e0f4727564e981f6c1b8c6e404d092f3034ac..1ca264b08b349dc20b50b7b679d1818d1eebaf59 100644 GIT binary patch delta 935 zcmbQp{-1qftM^R9Pc7?fLR)lw$OJZtbj$61}zILERm9Jw;ReqLPUWijg zxq-KDrg4@lS6)uCYgmX&NUC9OKx(j|nSP0-dt^{vaBz06scTk2YNk_EfTy#*QJO{Q z#E;@(el7*3Wud7N#->FsVdfStZh@JWX~oIrh6RCsVXhXDfrh~)&IQ_$St03MzA2%O z;T|c5*=Am;DMkea=HbaD`Y!2iX2BVS5pLyv6#(hb7$w3}464GyoGePr z!!5l-6AS!{^NNeg3LLY{ebRj`tAY(Goy#nO1M}Pr(k-|W9Rm`>gF;-LTm#c{owI}7 z%`E*&!=v0B6T=cauPcGWU#fFBcy#PsdOTr-Cpu_xb<}m%J*Yg0c`t z-_o>5lQNUMvZ5j*f6K~pt_;7ZC^whLP~U(+KVMVr!sKMr#3Il1!oYHafQZt=ocz7Q(u^Dnw2cev zJ%S@kB0VD0leyfJ)2qt7G7F3SDx*TevJH)*4BTAv3Y|=R($fMBE3&);%Og@s@(n$* ziqWm}4R$k5Nms}ZjkNU4atSia$TTcWDsm1ptupmYj?A(!P4+D)sPMNm$t=uvF*9&2 zFXu{js&aEL%BauK&JNG=%rwZ!DK`o#baW0iEJ!qu2(ipA49^Zub&RwKbNA%Z)zwuf zGOR#Tb VoFA0+v~{|O_h$Y?KN(rcQ~=4dHAMgb delta 824 zcmey*K9PNbPJM($Nou%PpkKaWaE5_Pu2-c;zKcg*a!Hw!X{c9mXl99%k84=DbCpY0 zF_%$Da;aN-RZf^sSb$Shx>s;sa709rWvWYQv1dk_pR2o9MU-Pjq@j~vK9{bYLUD11 zZfc5=si~o*LUFodVy>ftuXC|qSfQzpS87s@v!SP{ghD$_Nptpr7$w5JLPL_04Gf(u zT*{1d3`_#jyaNhCeO$c?(#!laJ+rdQO3J+Sy^VY#a`L$ntIDLesp9jf+eoD=qRv!vZ2b&C4gBWfZUXwG0gPO^Gbct4j9`E%0+U(RXol zj?hlD$So^QD-FyGGRREMb#*F856kBY%E$<*C~^0V^z?UjN=`4x&Z+XwG4gcGbIdV! z&(8Gyq(B!>mz1>1)Z#SPfcmuZkf>6(!jeeO zoa~%%pAs_T+FgOqC#8^jXd0=TvJLN%^d^k zlij?WT#L;zBDpGjJTi>(jT8Mc@TG>9JyX8 z>|3F0_G78K=I(+bQA0CDx1fi|Zv0<+*udnQ+^)AKc1oGEVt;rnd(NZ%@;tMRiu*RFiw~&4A%-eqpJfQ@(bdV|GPG zAeX;Ka%z&3d3Jt)cSfbDg`-(vUb<_TUwCqMcDQp!rK`7bN_l#vg{zNgK9{bYLUD11 zZfc5=si~o*g0o||yGfuzWQD$mk7rS3sGqBmQ(|FAPQ8VRTajN-rAdU3bEHv7PE3CMC5fJqzWK&!`A&wWk>!QumL8EV zkzCG>rUj)D-ui|fF8-M%jz$q4PM*g8Dc-qJ=1x@}#U&;#m8IeCzRqSDt`k3s*J~U3 zq?V+3W_m`2W}BCnc=!bymP8i&rWd4}n+F9JoD&$A%mj`MGWkvZLIp=yu zl_UikRQiWSnWmK)X*=eWm1PH&Mi_>=q&fRJI&#HZB(S=#v~@OjsNhlSUlik3vR(7X z&gU&RlLJy`99DgGZEti{gs#EiXu*YV7YeblpWL+fb4k_gN!F?lPraOaq3)1}!Kxe0 OS^@$QE;p<{GyniMfw;c_ delta 426 zcmX@ha-VsEPJM}cm|uyXOO#tp4 z)`&&s@jw8-2tvvM!5!c3F!P*>kjcT=N$uE2^E z$4H-i|3uF;%SxXRKU0I8BtP?fL$}-zbEjbEY>T{{{2)htQy(*5E?r$+g^)C7?cy-Q zeEl*nr_8`CPjCICT&K#&aF?QFV>8d90RQ5kh+q?K&!|clSFVRUZ?Q&hI5aKaP$Z8b z(QcKGL#^wK+i7#Z-ZSO&`0o4Y`NUb%kA|9BP5#g(?yuDLgL774xO7Oxm0tDtFSi~| WYnb^Yu3h8G?1$%s_kNUYUIhT?+?Q+s diff --git a/secrets/gitlab-bsc-docker-token.age b/secrets/gitlab-bsc-docker-token.age index f3798053320023b4a5d66b7a6ad3391f7cc488e9..4650f21d7a6f53ae51f58f9e79794e70519a631a 100644 GIT binary patch delta 577 zcmZo?`N}dur@q`j)V->}uh1;m*eSFkt-L7ASlcAWFD%_9BQrH9DkM84Ak4@+Bs(#^ zm@C)UDYeSOtI{mPtfE-oBiqQw(#6ELC@dhYFwZOf$pEu*|YR|3v@%JRb|wfC7EfVspcwWRKMF zNUkDJv%E0R@Dxk!%%n2k0JDJHZ13EPuyWthL?``hLqqrERL7jqfKayp&xs$!>n$y+ zTuj{p3o6__tBSQf{f%=iT@wo;{mqh#N|MvVEAvdlbE6`hij$oKxqQl9T|9CP^CC=3 zgZ(WGbD}Ck1I_%+Ga~YgoJ`9LOu~~g{L`F6d_%J=xO8=O6@qi!A}w;F0>etfi;K(s z%uP#-eRI;?jZz%VG7Td9vb{~S>n$U~{7OTM^SM^Vzf^hXdizc56Z7=rj57M$PV_`A z+pUmtzJ%qJM)`8vQ*7VDs~nF%W$0wCGGDN3zK=osRU-z`6?FjCtw&)eDDJgX?(uh22q(9xwJqqHckG{fJ-FQ_OjvozEt znJd#d&DXcU-J>!w(!;&dEIHGnxG*cUBsn0pB-JOUvdqWb)y>>8EWqD2nM>DBp}06h zH#Nn`)YQ;Y!Pzm~-6T-K%)i*ZD9=}0KiM=V#LvPo$gQlv(9FzRyD~jD*wZaFII=3D zA~M~`)Z5*a%dyO`Dx}<~%G<=fG_9&6(=t0Su`<=%+1Sj;-8s`GD9EGAxzNcc$k;f2 zay+AWy@j(!p_h5Ct4mo{Sz%JHn|7+JnQN|Lc&1sZhhvp%XhEiHg|lILu6eKlS5iuO zRisC`agbwDibtV#P;sVPXn?kMaiFP%Sy*UJaGqn8qkl$bU}%;nm#(g^Lb{`AL~v+Y zR;W{cj-|0hv9qa5QfQh*Kv`g@XOU5XUv{ujeV&h{slS_LGS}aU?_3jw4c*PRcimkX zEf}=4A*Q-c#XbK+a^ReD?*O$;yYG~*o_*X*LNbMkCR)%mvGh_T2G)cj-w_9!q{U+q(_(cIrp$J5(TABB8Fn@s^4K Mb8^1z ssh-ed25519 HY2yRg pXNTB/ailRwSEJG1pXvrzzpz5HqkDZdWVWnOH7JGeQ4 -NzA+2fxfkNRy/u+Zq96A02K1Vxy0ETYZjMkDVTKyCY8 --> ssh-ed25519 CAWG4Q 7CLJWn+EAxoWDduXaOSrHaBFHQ4GIpYP/62FFTj3ZTI -vSYV1pQg2qI2ngCzM0nCZAnqdz1tbT4hM5m+/TyGU2c --> ssh-ed25519 MSF3dg Akmp4NcZcDuaYHta/Vej6zulNSrAOCd5lmSV+OiBGC4 -qTxqVzTyywur+GjtUQdbaIUdH1fqCqPe6qPf8iHRa4w ---- uCKNqD1TmZZThOzlpsecBKx/k+noIWhCVMr/pzNwBr8 -r'Ƌs4˺AĥPL7` ) H-0AH5LQeH2bB޲CJG"-S\ H ssh-ed25519 HY2yRg 6C5Cv7ILdBrpMkCTT/insUY0kyQWbfgU500Ai8ePOXY +tMw6ehFrsq2dvDEXkLOJwrNZfI28trlr9uy3xW/fzpA +-> ssh-ed25519 CAWG4Q x/j+364IYURgt7fhIPBzabbWMEg08nX8MRrJM/1Q6RU +AL5Ut2rDr3UXcQXMZJ53ZMf5wMHmT83whx0ntJfW/WU +-> ssh-ed25519 xA739A QjXftBsoGV1rVeHSKcsjp+HMpRVsaHOeeGdDcF6ZWg4 +ovVoYPaPn3liGPAxHWY37CBIUFjAXurv6jMWs2He3HQ +-> ssh-ed25519 MSF3dg FG0CQOj9fRlneW5QrWiy5ksRpicUwHqX9QMpZWhDImw +L20n1vZRepsRPT4xM6TO6PcI/MJxw4mBLUF0EPv9Uhs +--- DEi7iuzkniq0JPatJ5f2KhrhxWid7ojHpvNfUCGxFtk +% n!;^QrqG:jC.8l|o$LYyNb:{fާxTS\tU\F)%KL㙇p:>&) Q1H܃VSޑn \ No newline at end of file diff --git a/secrets/gitlab-runner-shell-token.age b/secrets/gitlab-runner-shell-token.age index 9eb02d59ae5112ca6001fcbb4d2adb63a578a740..d35f5340cb0496bce82e3e40ba7d155c6f04c52c 100644 GIT binary patch delta 573 zcmZo+`NT3or@kn?sLaH#IM>)MInlV%Gte{8(_KH&rP#PE&oA5Az_`TGsXX5)x7;Tr zolDy%%{)8E)jhYkJjFMx)ZL>ZEYmzcpu*ccDknHE#iYb3Gq@tcB_}J_mrK`9p}06h zH#Nn`)YQ;Y!Pzm~-6T-K-8`kB(8MIGtV+Ae!__GyxZWqpt2i?`)z{V1I6Si;CqFf) z#7sZL-`FslE6*~_B*)yv)v-Xo!ZfME(%&p4F~Y+=s>H%P*v;3|F(NV5EYLsSIMgB( z-MR`#b7M#HJj z{QR|ja`VdbGKqhYJxWZ%SV-$-XAz07iwQ= ze6A-pN$9zesf+zC0iOpiXZ1Jp$_P50lr`C>#u4H+<*~rM6rKFpW!9Cv*R-t!4u_bp z6uG6fb*`OmMP=?HrM8*DmyK0KR=3SJl>7O5#ai!|eqtO9jz%@I>-!;X(oJ-eEp}06h zH#Nn`)YQ;Y!Pzm~-6T*U#3?(apu!^C!rRcVEZn#}*{v+m%{)KX+pXNy%gCi7+^fL2 z#M3_{tK8X<%O^Q9Jv=D0!pPV$Ft6OeKhQkQLfazP*vrDi*(207C&IAIJ1W>UB-6)r zay+AWeRx%on?-h3NPxLhM1)ILZfJpdc7U^YdTB|9W4NJFP(Z#}Sfx>rZ*GPOmxZ%& zZir=eNok66h@+oNMMkAxaaCw`QmTu)S%gWszgJ3yg^{VYc1ol#m#(g^f_}MiWS~)K zYDlJ;xrd=wkb!$~N2^>P&~$sDtk+mTh$O z$a|=I=Q_`M=2J`l>xXY!cVr6xqMRSkr#-Dbrp7Vt&9rStG^QQ8;Jxbe&ESnk^S4*6 Ju$7;m0s!*Cr)U5G diff --git a/secrets/ipmi.yml.age b/secrets/ipmi.yml.age index 0240478155d4e35f1b1d58c328dff89f63e38fc2..e445aaed3f2865c4dac62c68d45c607eb37fea4b 100644 GIT binary patch delta 1247 zcmZ3$*~c|Or@ks6%hSloB_blHqA07XT-(Im&!ou0yez0l-#snA&@tW1Fi_t(sj@Q1 zf-9{w$vCLAG&JATFCw+lH?1hW+#@0?E7>Qq(ygS(Jk30`)XU8{#VuIdkxSQ3p}06h zH#Nn`)YQ;Y!Pzm~-6T+<#L_k0GAuH{Q9H%VGBG4Sr@l18!ZgXyBeW{bEIG&{Io&KO z!#&T-t-6>$TI2 zvnoBE+*~q!(o!Qmz5IfT-3&a#O>&L2%MBc>k_^L)_5A{Ji?l1sxr{1Z_0t^-T#a*c zy$ef%GRmql-2Jqj!_uo#y!9=MB8v6BQ_YNXEyDaPxO8=O74-Ej95W4aiv0|;BP+~< z!b*$_!<>!$&7%C$GxE(%yqCl-Q++DJLb=q|$4=%n(t6(Z<C1e-dGS0`R<7fgkiDO@sd{3??Wy_REyce2YmZHAxz{K0VY5T4!u&ZK zc2AN&RFxcjJ7(67=%*QbBwOt^H8E((J-?-Bv7v9D_B}qEMWyuy#lllQuyZ7|7IAMr z_2a*xegJrNfRe#cPuU`Q|vhKU0Sgr zU0%M%f`46-`lTM*FAV`k(RJaon7#&7R4=`5nE!71qnUwB&y`CyY>SzbuX3eQQ?oA9 z+)$%OYfk0W`pOu-X;(}RNQIxvw?9!TTAow6ZF_|L?_Y`Uqia>?IQ5Ax+IwZ*{s#g} zHyG}YlB>9JfbYvi@1N$c-&M43V&c`al6+!aEwem~_vl|AzVI)p|IfKebY>pwSUT;* z4AK3s9{<|q8?M@LX4!t1^mj=!S5*Guoh9M@zqq%V{cEd^^2zP>lYZqT=InKl7rh~2 z`F2~qzxDe7r!5uLYdyt#%-^SJCeBIMn&WERY54N-COLk|jva^h9F6+BMxk{6qtHcN z_YQLZe^AVvnZMeiEc$(Vq|Ng+vBBB)xw&P_ie_A>(tj#$c{k_#$BnvuS|K^#L_&-e zg$^)&liw8~qCQ=1<+rsn7S<>Hy#D>a(z(vUUXPcSm)`GwapK^sZA<2V`g*@8Yu8&z zw!}cD!}4bxjqGoJ-Thx#_eY*&<4G2+mxsdCL%im7-=8=w?_2NzyLWT;3#7NM{W0ZV zsrJMApH~-Lt5#V!wN>YRhx65J57$!@9*Xm5PmZkLJ72JW14D$JgX;IGpGuD3ek#9d kW1;4U#ACmEuV>B*miLHE`f|}iocDpqhjaPgqNZd30P$8JaR2}S delta 1137 zcmeCCo9m^z}-AtyUJYOE8WP_q{ye-t<1&9t-#FH zkt-<2JtI6fywuMzC%Lk!I5#md!rRg?!`#!Tq&zjOD#unh_ioYc3752cBHFio?}^>SC(NUS3#7K zmw#oVyHBW3SX5+&M|qg1xu-#{fxCsTce!72a)Dt|kY!OoKw^L|m#(g^LatL#SaNuI zdPs#|hG|Ned1R(vL|STYmRDeExPP!yhG~#NeYkN{s7H!pK3C7BLQe*PdrW?bUu)HU z;_vU}o1ptf`L)TMv(qjX#&>x=;trA)e67*X_H2FQwpNZ6b8gtW_N|M&{`Me~2GjMa z(vu8(tt?lB=x*O}j{m>=pElJX`9#A6g{165*@KoORASoP~EFYqmy`aG58kL8DpT=j{fK&V0vT-~XCr zobq|+*MnUK3p~42e}BGk>t#&qw!G&P0=YlTtQLB8wC>~l`r^+8htje|d^;_^DV3Fe z-};gxWPzF7+Sr`<+c~cCjc0iGul}GTHeHyb@2FWr>@Fn^!Jnm#>mPQQtTpO;8^WI6 zr*@W?Vfq8D22?-FB`{ zEL-1Tu=3Ev@8+9t-l}idqrd5))9S_O)OhsH($)7w zGKEdEA29uj3|cdN*OSo1qCbvHPq-U#R6aj?Ed0kmw}Kv%MP__AGFJ3uf4Mz9T;)@U zLPo(1o^AiPtn@C<^ycznpID?*m!HzKE;Ls2@usO6jMofeX8$d(6j*<(K7Og?UGa$@ zRSiuu->i_^tYk6&>U2Zve;GeSmd^fX(=x4_N$82Dn?(`#wk>hH-X7{?pMK7Ao_N=k zK&yp+l9brpYo`_YK6q5*zSKeRwfUl{eS$XemG=s_6bbFTcOXvXv{I#wmFW4joz|`w zqddO`|F3y&?q{@S`o`HhwiA21FPy6nb+n2v%ikZUB$3 zbD!f?O%^$u^;dGg&u?hw?owZxr7G@wblTVHIk!2l9rpQkkH`G=KIJpZ7(#b%|Lw+@ zb!NwT?jI^qUwCG#9}}Ns6mj&B;>lTVpV>CEuzMerYRYKo3x*3``|H&a?GrzcKK nnJ1qS%X(49)4y?fx#r{MeTU_8r$psePt$WLUE-kdWuh$r@Pg^H diff --git a/secrets/jungle-robot-password.age b/secrets/jungle-robot-password.age index 3e7053e43ff829c030a2e63a36e7b764bf5a5324..af68a1b2dbc1f8ac9bb13b277a4174375cfdf0ea 100644 GIT binary patch delta 534 zcmcc1e41r~PQ6!Prg3FvRbFC-yP=0+WwC{6P>MlbdcM1HK~+&jiBooYd8T%xv$lJ1 zI#-xSdSXUJMW%mNZkl0bx_(NQNn}Y>fqSt@dO(Otv7w23hGB?)WT~%rB$uw8LUD11 zZfc5=si~o*g0o||yGfveOK`ZMX{td{szWXuX$vk+)%nwqc}MMnH*^QFdxs zqCs+{nX9WKm!WZRuD4-Ha++yUfoWlcPqvY_tGi=TR8c@wL9%vanOAUDRiU{_WJ$OM zx^)$f=Ejze3PEAn79rtY#bus(+UB7qt~7p~a>8*=8ZRW@e5if$pZJ1`|Ju*PA5< zltt$Fg!qKJhm~8J<>ioDx{X?m8XVjdzny)J(v03tr!O1&XsRTeZ=d)%W`l{fuHhl?j-I^_l-Fs>G)_}pS1URBmuQKS Lh)Z?+DdQIa+48lY delta 424 zcmX@ja+i66PQ9CBQL$5Am_@#ibGUPWb8uQ|l%=stRZ4idv#GXWU`VlNSblItMu>%x zCs%Sxa)fhgU~;8$%9fnTUc zs-b06X|8cGmx-}SSfRPOk%w`Zk6V_9L3vQVSEON?uX~t_Q>2keiJQM=PQHJrX>LaL z^ob(){lH3qu^TY`Efbf9ylC+Sd0{6t6bU!you3{%& zKLZzc6W`RFNW)6k3jh2-)2v|2u#BWgvn*#nH$#_-tSqw%?=sh9E?r$+1@lrbH*@!( zLVe#7L+5g@r1Ja#Ba@)ivMR5jq};$z#{f_FFqdqvGRNYibT02e`O8nwy}TFV9xIga z_UsMidY#Kj#cNs3QIph2!rR#r}E zI+sa6s#|EePg-PlR&ZvZWtN-2K}C9*U%GE%URGeSTV$kJiFTn`q=}<;AeXM4LUD11 zZfc5=si~o*LUFodVy>ftPnJt)s!O74goOHzopzj3ltSZ--hU~YI>YGy%JS(sOmPgF`#R%voTM!spKUxr`) z#E;_P$q{*BVO1WEK|vOQkrl3$=|$$1rg_B$Q3cM%6_w_NK6%DL6{S)Bl|i9gNj}*{ zk>*u7rjaHFnL*jUrsbv;zRsSx&iWPJ7EwuQ9zmWFc_Dd$!A1s?;~B-nLkx;@EJKPb zy+YG`oysiSlTE`b4Z^%~%tLdsy-K~^e2Xgr%)E_LOES{Aa>BKXb4r4JD@`+;Tusvq zbM&2Dz0-dB5 zFiy|S2udq0izqWLaLh3ZHOuv?@CxrZGLQ}V-3Uv%p#9L$Ux>N`q4?U0u2KL-Ngw^1O|Ty#l-oon1lUO@}pb~j8Y2B zvoX?_Z?KzjO1eTokV~kgUx9Z}Vv&1zp>ug)g>z)SUqpFgsCkudd75KkqE~UbwzpGy zWj>d_iBV`$xo>@`v2m1tm7jiLimQifxUq3|NQQrixly^7N057zbFx#3b9y9~uCA_v zbCqk7hlz7xkg;i0XmN3NdQg(1nZI#(S(3K4wyBYCxsh2;Qe>EZo>Mv3&Agj0Cirb^ z_y2Aq;q>T=VRqMx12%l=4EIBO3Mv?WtdX>+^jy7JGjzIseQw(P3em=1g$tr7mnJt( z){|JB;Br3mCUfXgl!yFps#$*X9Fk{ejB6d<2+Z%)IJYnrv|Aa>E-J|Jj>g`pEkx3EuxI zb&bdV!OYcjkFW^sT&BD4ex&BxOR@@oeo4B??24Oz-avz)Z?cu>qO?^3#h>ffzIe9r z)Wr!mU;j;OR~;} z{{E3DlXK5Tm1p!;_}tEck`rOdbx`zrl2(*bL_LPw{+ z3h%4-#C`a2=XI{-=Ij%S3rmX*-EXnm{iB|}i9IM{wfqjTFM+(#DqP<0v@iTCi^yv= z>u7qb`Q-L#&b`46hFe_3Uc?j}D4+45JyzSGOK^&GDxbFCvMfc%%ZJP7gXIIOe zP(6#*$qHXh6}4I3{0IqFZpy#@GdC)C>(7S|4&ONIylsX0Ib(^ABNunR*vr`1@aMYN z>7E5A-{0BlyML|B%{R6&Yp35{|Ap((+NqPSYFv)=C@tsU}n`iwtkN)y(#2mh6~ zd(d@xf`l%|bDjV@#xwQh9G-b*^JOmYzns0u`+AnL*P%9jLq`c)r!U#!xf^^=2mD$n z`j-EyY0tCOsxG3X8k^Pc*DW!SdNk#4p3mc|@buPI5vo)5t0w3~7FBNfzQ{7G@sXEjxQW8I>)PxU1ZaC@C&-6sVg(p-j0@)TQNiW z(7cH&+aJFy=Cd!@Cb-+{Rd4WZ*KqGYskQQF7PCIFm{uY>cWc3u2;XMzw_m;2czyLV zNnNM8GhmmPskEyw18>LM_^6N?!GDJD1Q#Z|&vscZ>r_3z-{HZ(xK--=8yyzfA8~o} Y;e2d?jqe^RhVy3P=QlOSekQws&{x$X;QX9 zK9^s)c|b;pOKPx5QC^6#p=VTfRzPUEet4osxvOu6uX%_`iiK}@j-h{YGMBEMLUD11 zZfc5=si~o*LUFodVy>ftk#nSBNM*50slTCtbErvLqFY5~VwHJTc%gS%NOD1nS&o;F zOK@h9mtl4~SFoe2WvY*Np?Ru%T0~K@e|V{OQbBQLaj3SVsY{h>X<<-6RY_!?bEHS{ z#E;_PC6+}lp_V2E!9gAtzUd{-A!gaB5h+zAE)l`zrY7EHQO2c(c}~gZA(h!&p)N+r zVJ5~Ix!!q!!DafnX2JfBm7#%|j>$n~DcZSd<>6sYnV|;RMJ}F`;~B-nE&R$#Jwo!e z^ZY}MovT7p9Yc$qD?D>tyd%>*9mBj`O3ZWf5}hJl%t8XWvckV1vk%jIw0Eur#OASTgWZf%(iKv@yvieU{enu}Ju4#I)0`dM^}W;5vqH?0wF}C$i>tIN z>%B|U%stF1!d$sLlifq30{l~|yhB5ay?sm5jY7?H^aHEROr7(*(_Jz>i?nkRQ!>J{ zjf=T-b#)b-48j95+%qim0s?}=%nZ^zObT)>68$|Qqx>u@3=DIsDtszZ{Bp8f3W~X& z{<-t1Gvw=0<9?$xZx+4_I`RC6){mSDw&?mVsmDDx^5joCdUi^IibTu?N3Pj3)i{d- zmCxtZZ)M+*Gof;o#H5fjHfxf)E7C1?h!l8<?ht~Ffcd1p%ct0Nyig--kw7xZ*lW3I=mgV$v`RyURQ8?NN` z6;)+^THm`hjp3iWl(KX3wv(RW<&GOw^0%v8OAvn{75jf(eYUKTsp{63vH92gIjxqS z@{VzkQr8#ns7PiL3yhN&{JgsDo%NrS6*oWleqU#z6x6YH!;w1{zBzkW&AG%_ZDwQq z(tC1PSjr~#JI3z%i~c{BHg_&XQ1T!hoi`&yN-ojc)Pxz+8xd+8?;Lz-X>FD;#~czExR^lY>J3|Y~Fk$aL%^tfdxD#qjnu; z)hYgGxa(J0^mI?pNAJ71 zjQ%fKas19txvy3SAC*4QGgslCDl=)zx4GRDPfajC)%kDMzLzZ1H{QFy_Q?FgpLxfx znA8UeNmv~VWeS`BO5$(JKJg!oXP1fI_CE2&?Vjer>8z7J<^6igvFYfKpnAy}c6(MA zKELnlWfb_h>rEt=?`_>Dr_F3`zp(swYpSHSnfG3^i&L!TD&G%xSZZ_q%0ur78q^S^iHOIhySL((m3EArl>=^>5 zxaoyX-t|5+F#AaCDn+l#&iO{?_hu;7uYDAnZhhuO+Oe65i>C2L#RuQLVg7Q}($j^3 z88Q844Fy?ICDRXoO}A;g9Jh0S-2QFfQg*Wl9)5l^{Rsc+NzGkPc)qni`>DYen?Pd z{$F|9aFgJ>GzN32Bsu+$&y9rj6@*r1J71R-Kj~2!;TJY<`wORt&2~e1Xh^3N2GU5s4&^${3GQb%l{r1%N>t)PPuy4cx}fco#r^>2WKaFUih`@b)Jo( zFzY+pwBsv8{Z&}RryA$4`?hY~juidhDz9f8(S7>(#r|ps(S&|RBc0<@miVt;FIubh ieO-@$%!6Niua5?;s^Hh(@%6@w8g2F5!;AQmmAe4~EDP}f diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age index a498056fcc84bf41240facabae6993a82932b2a8..9ba631a91a1d17f65e7d738f4d52ed48233454a9 100644 GIT binary patch delta 592 zcmbQn(!x4Hr#`?dEK|GGu_P=q!XVJaH`Lu!-?PBdzo@JrG%?vFBeyU-SwGJ>-6!1G zfXg-8(XH6g%)6q(-z>>7u`DRK(n~)r(Jdq}FQPEnQ(rsCFv_tst1Q<&kW1H2p}06h zH#Nn`)YQ;Y!Pzm~-6T-Kz^l+V$um5(G{hh~S>H4xr9L#pB-13Yz{D`z(5E0V(*%Eh` za-u@RN~01z9rNuEXxeaGt$FSEnSRr3JZda+;d$$xxD;b%<@W%g2LTH zOpVh63-pWfON^2t{0nl8lTGsU!vg|La!kFw^vm7yxpZ}P74)5ror^*;Ba2g`!paK_ z{7g!GgZ(odOHC|&3-rCrDns*(>w`iI(sB#)lezBBG)RuSmK4FMXP$Ig?)aQo=d=rv zP8WokpY!u|S08d-cY5&guWRvv!|w+?{nP^U6Pg V$JHO^uNB>FcDO72sdoMjVE}~n*N*@I delta 482 zcmZo+oyIajr#><)L_0q#&D_|dEUh@nOFvh?#LOc+G{Yy)vZBl{B_q<%H6)}k#jwCA zkju?K%`~Oh$ucY@O}io_%CRJ{tjtuq!p$YhqtLsw(zPJR%egSD(o#RklS|i5p}06h zH#Nn`)YQ;Y!Pzm~-6T*Uv)sfhCBw5QJIk!9$|cgp!p+UxEM2>@G$X$(Co0Rs%Oo@1 z%P}{sBr>p=D=RG3+1)a+Fx$f=HQXaAKhi(cqsY%Kzo0Ct+%&Y@KSDdqARySn#o4oX zay+AWeQFMP!muW<+7AvAJn-Qc7m3hns(~zq5x4mwtt@ zTfT2lQHhC}sh_udNN#GlXQ{cVOKDbEcCxpTWqPuKzDZIOzlT%_+P^7bCd4+yzy`N)Il~-zBGS?H{o&{GP7$0TT-(cBT zlTb9lj=OX2QX}0QwV1E=5&JiKttoz?$$jsAEYoVEwH|w=PM)5@SF&6zH1j5dwA>_t ztnaUrR?Is(b8X_HaOT{vKaXvyzqZ^{hj(UFn7z*9%X1?t`Il7j_b-^Zc_QzkKWfT; cA`MZleL4chEOAeaRp*MaCd+>6`M1RY0QKgr*Z=?k -- 2.49.0 From d0fd8cde460b0376f797b3d50c809c23feb118e9 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Sat, 7 Jun 2025 17:51:40 +0200 Subject: [PATCH 344/472] Disable nix garbage collector in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Rodrigo Arias Mallo --- m/tent/configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index f31552a..07b9244 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -67,4 +67,7 @@ wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = script; }; + + # disable automatic garbage collector + nix.gc.automatic = lib.mkForce false; } -- 2.49.0 From 61e6d3232b45bdf14811d042feccb0ff966fd710 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 10:32:31 +0200 Subject: [PATCH 345/472] Add monitoring in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/blackbox.yml | 13 +++ m/tent/configuration.nix | 1 + m/tent/monitoring.nix | 187 +++++++++++++++++++++++++++++++++++++++ secrets/ipmi.yml.age | Bin 1294 -> 1563 bytes secrets/secrets.nix | 3 +- 5 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 m/tent/blackbox.yml create mode 100644 m/tent/monitoring.nix diff --git a/m/tent/blackbox.yml b/m/tent/blackbox.yml new file mode 100644 index 0000000..d716359 --- /dev/null +++ b/m/tent/blackbox.yml @@ -0,0 +1,13 @@ +modules: + http_2xx: + prober: http + timeout: 5s + http: + follow_redirects: true + valid_status_codes: [] # Defaults to 2xx + method: GET + icmp: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 07b9244..1495755 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -6,6 +6,7 @@ ../module/emulation.nix ../module/debuginfod.nix ../module/ssh-hut-extern.nix + ./monitoring.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix new file mode 100644 index 0000000..e8b38dc --- /dev/null +++ b/m/tent/monitoring.nix @@ -0,0 +1,187 @@ +{ config, lib, pkgs, ... }: + +{ + imports = [ + ../module/meteocat-exporter.nix + ../module/upc-qaire-exporter.nix + ]; + + services.grafana = { + enable = true; + settings = { + server = { + domain = "localhost"; + #domain = "jungle.bsc.es"; + #root_url = "%(protocol)s://%(domain)s/grafana"; + #serve_from_sub_path = true; + http_port = 2342; + http_addr = "127.0.0.1"; + }; + feature_toggles.publicDashboards = true; + "auth.anonymous".enabled = true; + log.level = "warn"; + }; + }; + + services.prometheus = { + enable = true; + port = 9001; + retentionTime = "5y"; + listenAddress = "127.0.0.1"; + }; + + # We need access to the devices to monitor the disk space + systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only"; + + # Credentials for IPMI exporter + age.secrets.ipmiYml = { + file = ../../secrets/ipmi.yml.age; + owner = "ipmi-exporter"; + }; + + # Create an IPMI group and assign the ipmi0 device + users.groups.ipmi = {}; + services.udev.extraRules = '' + SUBSYSTEM=="ipmi", KERNEL=="ipmi0", GROUP="ipmi", MODE="0660" + ''; + + # Add a new ipmi-exporter user that can read the ipmi0 device + users.users.ipmi-exporter = { + isSystemUser = true; + group = "ipmi"; + }; + + # Disable dynamic user so we have the ipmi-exporter user available for the credentials + systemd.services.prometheus-ipmi-exporter.serviceConfig = { + DynamicUser = lib.mkForce false; + PrivateDevices = lib.mkForce false; + User = lib.mkForce "ipmi-exporter"; + Group = lib.mkForce "ipmi"; + RestrictNamespaces = lib.mkForce false; + # Fake uid to 0 so it shuts up + ExecStart = let + cfg = config.services.prometheus.exporters.ipmi; + in lib.mkForce (lib.concatStringsSep " " ([ + "${pkgs.util-linux}/bin/unshare --map-user 0" + "${pkgs.prometheus-ipmi-exporter}/bin/ipmi_exporter" + "--web.listen-address ${cfg.listenAddress}:${toString cfg.port}" + "--config.file ${lib.escapeShellArg cfg.configFile}" + ] ++ cfg.extraFlags)); + }; + + services.prometheus = { + exporters = { + ipmi = { + enable = true; + configFile = config.age.secrets.ipmiYml.path; + #extraFlags = [ "--log.level=debug" ]; + listenAddress = "127.0.0.1"; + }; + node = { + enable = true; + enabledCollectors = [ "logind" ]; + port = 9002; + listenAddress = "127.0.0.1"; + }; + blackbox = { + enable = true; + listenAddress = "127.0.0.1"; + configFile = ./blackbox.yml; + }; + }; + + scrapeConfigs = [ + { + job_name = "local"; + static_configs = [{ + targets = [ + "127.0.0.1:9002" # Node exporter + #"127.0.0.1:9115" # Blackbox exporter + "127.0.0.1:9290" # IPMI exporter for local node + "127.0.0.1:9928" # UPC Qaire custom exporter + "127.0.0.1:9929" # Meteocat custom exporter + ]; + }]; + } + { + job_name = "blackbox-http"; + metrics_path = "/probe"; + params = { module = [ "http_2xx" ]; }; + static_configs = [{ + targets = [ + "https://www.google.com/robots.txt" + "https://pm.bsc.es/" + "https://pm.bsc.es/gitlab/" + "https://jungle.bsc.es/" + "https://gitlab.bsc.es/" + ]; + }]; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:9115"; + } + ]; + } + { + job_name = "blackbox-icmp"; + metrics_path = "/probe"; + params = { module = [ "icmp" ]; }; + static_configs = [{ + targets = [ + "1.1.1.1" + "8.8.8.8" + "ssfhead" + "raccoon" + "anella-bsc.cesca.cat" + "upc-anella.cesca.cat" + "fox.ac.upc.edu" + "arenys5.ac.upc.edu" + "arenys0-2.ac.upc.edu" + "epi01.bsc.es" + ]; + }]; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:9115"; + } + ]; + } + { + job_name = "ipmi-raccoon"; + metrics_path = "/ipmi"; + static_configs = [ + { targets = [ "127.0.0.1:9290" ]; } + ]; + params = { + target = [ "raccoon-ipmi" ]; + module = [ "raccoon" ]; + }; + } + ]; + }; +} diff --git a/secrets/ipmi.yml.age b/secrets/ipmi.yml.age index e445aaed3f2865c4dac62c68d45c607eb37fea4b..02d1218774bb6ebfcbdcf14b93c3f410e9efa2b4 100644 GIT binary patch delta 1499 zcmeC(9$d~&9$s5!XPLpGtV<6*VEKBE!Wi0 zm&-86FFUEkEy}>W$}~LK*;C)UGRQJhJ1{w=JlD&`sGuOjIX$(qs>&lTluOr6p}06h zH#Nn`)YQ;Y!QIp+!n9n$&m*tG#k;7~-(0`Q%{eA314=^kC&x32*N2olC0qLC=vRa%=B1`O zdloDx`avrx}}g1Sk5ql^P^ETbOxN6}hAohU9oT z7ACox_?3i))u&b%=jD54MRJ`!lkc^A&VO#6KWC5a={y;y^!!AzqoCr84%bZu`&<5h zs_whJd+TwTZ~PsXd_G>2?3}&!96L+j4y}k-m;Nd+EZ|h=bi{|foxaDliG1sp8_gSljUc8!O)bYNB z&)rV+M{SGlwvCc4Tu0P+&7_6fCf||q>EOThWyRa?crO{q4!VvmdP9m%gpZY(@j zB{%DdN|Nu>+JkeQ5{@+7aIb$L^iA<$`StTF4om#pc!T%mq@aCw548V1b@TIudq;$B zWp+0G%E@?g*Y~S>%hwwILgxG1`07l~ITrhCU!MBKUv!r4RxZy#HPNWwiDC;+#UEro z|N9K{56x}q%B`$N4O5=K+0WOpI!mk3XMK8AL|cu@y}Ld4zL@>8n*X`*GBdBMa($g^ zgP{`LA~pb(zORrMs4&WYoU*=Xz1OJjlKoBMVeUr_ zY||GwXU5ix$=<&{%VY+3R^v?mE8lGI`EGn=QZ^~K^4Z~QmbSOpl(LyGX>PrLRM2Xc z`}9PwdCrDEPv^L7eC^_sCeZmavxYs{PeI_^lvIl)905|x6*u>+Fx3UW!tDbr4Zsoeq;fu3R9{9$g>b}LOZ0UD*Cu5hwQ2t)IleumwE3LKP z-nkj0pyZG$AEEu!weGj`jBmS}*TkCEPAxrj?^bA-Ucod~*A;f}ADuj-A$Q~XQq(ygS(Jk30`)XU8{#VuIdapFhu@DfYc zbjz^F07vZ&p>w*15d-0 z;K%|~XCLQeQ)lPGRP#jRoG?o_pU5&7=lm$`s<4RU$?=Th;cg`!1#YS4X@TJeZk4V9 zF2Ui386k=JA*DXruKJZp9*+6><&NPlmARqGT%|t6nVv=I#zu*55zc0L*&%6eKBeh~ zc}Bt7i3JAcDan@Z1y1g%8A%4&lg~1W*K4O4XH|MSxw&Neq@_lBdiezvyBT-z=d7HLZdyvxEkl?dKZ=iWt3HAxcg~4hox7gcyoaOvvmD(LH5IA$8;6#E%wM^=~zg_Rf=hB+Jgn??DhXXKlkdb?yK*M}4v zr}|Wcg>tE_kDbhEr1iY*%cE_-*4}-nd*gqCNL|#j%Yi9RR!!-idTaM-d+(+D8t>=* zYHNKri;HW;w@rp^8)bY&4@6o1lITCg{r3Aqw|A$t zwC29+zh``=KWBgDciB|4lQq#NTvvQ=$+4SSFB9hMH)AD-(wF&u^Wu4?tX#(}A$vb* zQ}x7(+f(zsTZ(=4*B+bLa<5O~!)Aw8h52(f?4Bfls46-5cFe3D(N8n>NVeK-YGTlm zdwxsNVng3P?R$JSi%RPaiiM|qVCP6^E#lsM>c@XyXWzo(TCw}0gSOpX{Arr2@dyR>3My1aai1^>Dv^-DdrUm608qU*wEF?|iF zs9t*AF#p~1M>7MNo-3DZ*cLMzpH57#Kfy- zCHcgF<(euBiOQ zJ4?d*e{pX!``1<-<&)d%C;iGx%-QQ8FM315^6j>If9v-FPFpIf*LsTgn7>ccOq`Rh zHOJMs)9~fvO>+E_9Xk&1IU4nMjY8@CN1=*3S@iq#NSo(tVuQ2o zb92j<70tL%rT5vM+m_7#^!0vG)~>gbY>9zPhvm;Y8rk3cy8FMf?vFgl#*-{s zFAs&Ohj`8FzCUqV-nZZbcJJow7f5ei`(w(#QtgNJKd&yhR;{vdYOBus4(F@c9705-xGK>z>% diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 58ec2d6..30fe1dd 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -2,6 +2,7 @@ let keys = import ../keys.nix; adminsKeys = builtins.attrValues keys.admins; hut = [ keys.hosts.hut ] ++ adminsKeys; + mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys; # Only expose ceph keys to safe nodes and admins safe = keys.hostGroup.safe ++ adminsKeys; in @@ -12,7 +13,7 @@ in "gitlab-bsc-docker-token.age".publicKeys = hut; "nix-serve.age".publicKeys = hut; "jungle-robot-password.age".publicKeys = hut; - "ipmi.yml.age".publicKeys = hut; + "ipmi.yml.age".publicKeys = mon; "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; -- 2.49.0 From 9c32e42dcccba4a967ff450814c3e7a3915bafa1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 12:47:43 +0200 Subject: [PATCH 346/472] Add nginx server in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 1 + m/tent/nginx.nix | 54 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 m/tent/nginx.nix diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 1495755..c041683 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -7,6 +7,7 @@ ../module/debuginfod.nix ../module/ssh-hut-extern.nix ./monitoring.nix + ./nginx.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix new file mode 100644 index 0000000..f67a525 --- /dev/null +++ b/m/tent/nginx.nix @@ -0,0 +1,54 @@ +{ theFlake, pkgs, ... }: +let + website = pkgs.stdenv.mkDerivation { + name = "jungle-web"; + src = theFlake; + buildInputs = [ pkgs.hugo ]; + buildPhase = '' + cd web + rm -rf public/ + hugo + ''; + installPhase = '' + cp -r public $out + ''; + # Don't mess doc/ + dontFixup = true; + }; +in +{ + networking.firewall.allowedTCPPorts = [ 80 ]; + services.nginx = { + enable = true; + virtualHosts."jungle.bsc.es" = { + root = "${website}"; + listen = [ + { + addr = "0.0.0.0"; + port = 80; + } + ]; + extraConfig = '' + set_real_ip_from 127.0.0.1; + set_real_ip_from 84.88.52.107; + real_ip_recursive on; + real_ip_header X-Forwarded-For; + + location /cache { + rewrite ^/cache/(.*) /$1 break; + proxy_pass http://127.0.0.1:5000; + proxy_redirect http:// $scheme://; + } + location /grafana { + proxy_pass http://127.0.0.1:2342; + proxy_redirect http:// $scheme://; + proxy_set_header Host $host; + # Websockets + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + ''; + }; + }; +} -- 2.49.0 From d6e3d9626cc5c9c53abee0394c82370d88faf550 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 12:57:34 +0200 Subject: [PATCH 347/472] Serve Grafana from subpath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/monitoring.nix | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix index e8b38dc..1bc0208 100644 --- a/m/tent/monitoring.nix +++ b/m/tent/monitoring.nix @@ -10,10 +10,9 @@ enable = true; settings = { server = { - domain = "localhost"; - #domain = "jungle.bsc.es"; - #root_url = "%(protocol)s://%(domain)s/grafana"; - #serve_from_sub_path = true; + domain = "jungle.bsc.es"; + root_url = "%(protocol)s://%(domain)s/grafana"; + serve_from_sub_path = true; http_port = 2342; http_addr = "127.0.0.1"; }; -- 2.49.0 From 19f734e6228a31ba8c9b5bb5ff30114db1f623a9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 13:17:26 +0200 Subject: [PATCH 348/472] Enable nix cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 1 + m/tent/nix-serve.nix | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 m/tent/nix-serve.nix diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index c041683..63f1f66 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -8,6 +8,7 @@ ../module/ssh-hut-extern.nix ./monitoring.nix ./nginx.nix + ./nix-serve.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/tent/nix-serve.nix b/m/tent/nix-serve.nix new file mode 100644 index 0000000..35ccd72 --- /dev/null +++ b/m/tent/nix-serve.nix @@ -0,0 +1,16 @@ +{ config, ... }: + +{ + age.secrets.nixServe.file = ../../secrets/nix-serve.age; + + services.nix-serve = { + enable = true; + # Only listen locally, as we serve it via ssh + bindAddress = "127.0.0.1"; + port = 5000; + + secretKeyFile = config.age.secrets.nixServe.path; + # Public key: + # jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0= + }; +} -- 2.49.0 From 1cf989d7277cb96bec06d5741f848a98ab7e685c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 13:18:01 +0200 Subject: [PATCH 349/472] Remove jungle nix cache from tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 5 ----- 1 file changed, 5 deletions(-) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 63f1f66..8449b37 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -28,11 +28,6 @@ defaultGateway = "10.0.44.1"; }; - nix.settings = { - extra-substituters = [ "https://jungle.bsc.es/cache" ]; - extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; - }; - services.prometheus.exporters.node = { enable = true; enabledCollectors = [ "systemd" ]; -- 2.49.0 From 3bf70656dcbcd0d23842efba7b830bddae34d4d4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 13:20:29 +0200 Subject: [PATCH 350/472] Add tent key for nix-serve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- secrets/nix-serve.age | 22 ++++++++++++---------- secrets/secrets.nix | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age index 9ba631a..2d142fb 100644 --- a/secrets/nix-serve.age +++ b/secrets/nix-serve.age @@ -1,11 +1,13 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg PJVi+uAtVYX0QDMUG5/Ip9OrvpUacDhmqWc/n3gLW30 -EkAFsA6KxxO6bAavRSyJ/faFTQnXqcI/+R1ZAujvmGQ --> ssh-ed25519 CAWG4Q 0JqMbIWUuT0kc/5hdUd4i4Qp41W1LpaiLS5Qz3qlVU8 -uTw5Xjr24vZ1uWeMjy/OVF5SR2EsTgREBF0L1sFEjR8 --> ssh-ed25519 xA739A Ssbmf6ra+Ov4YC9L0ygizkmwOg1GLztxfDQtNY/Y4G8 -rbGwoyinV9phgBqaOgMJeqFKyAFB1fb0hLWhf4mviGE --> ssh-ed25519 MSF3dg KcJlZUVuZaIAnnWxuXZmxZZ9v0whgVe9D3lqpR2GmEI -JND6nt2RWGT53gQp/rot2bXOpm3c4n/WPP4l5KJ/wFo ---- /C3CrTiYseZVwp0N4tMSOiAu49Mp/J6yUn3RUpfmqoc -ݙ0c^bX .7bǜ]CfYB;d∭3%?߭r2-՞'z]$[Sf%̜ŏ 0NM_d|bR~puԝ Aji{6ÊW+o \ No newline at end of file +-> ssh-ed25519 HY2yRg T/Qom1qxE0M+FuvsXD/KZ6Usfp6v3Xwx043kDgxbCz4 +6GRg0QjuHd2+d6lJfZqqPMPMjS91HEcJ/W0KRV6Et50 +-> ssh-ed25519 G5LX5w pzg0wK+Q6KZP67CkyZNYbNcahlq9SIuFN18H85ARykU +aDSrO49tg/a3GOAJR96lh803bXoZqp/G6VMiSvf91vw +-> ssh-ed25519 CAWG4Q X+F/6LF8VUUoV72iCLzKKpYGRDoUHuBy1E+yr29RKEo +c779vpt/fiN7n0kGAc5jA9fWkzCPrthlNZdN4p6csrk +-> ssh-ed25519 xA739A sbg087VKj/gcycV9JrBNCoCfB4kRMDSVo3EtfpRVDyg +Lv5ges1KmxGwvz4UPZCD0v4YN2ms2Q3wmrJ14XCKYsQ +-> ssh-ed25519 MSF3dg pCLeyeWYbnNWQwwlGcsKz0KZ4BaaYKCGjo0XOPpo+no +IsNxFoB2nTxyThJxtAxSA6gauXHGQJnVefs/K2MZ+DM +--- tgB3F+k1/PQt+r5Cz+FqH31hCZFvr0Y8uZVKkdA80yo +60.(s?68QIdgb`Az Date: Thu, 12 Jun 2025 13:25:43 +0200 Subject: [PATCH 351/472] Enable jungle robot emails for Grafana in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/monitoring.nix | 16 ++++++++++++++++ secrets/jungle-robot-password.age | Bin 587 -> 697 bytes secrets/secrets.nix | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix index 1bc0208..e2ffd02 100644 --- a/m/tent/monitoring.nix +++ b/m/tent/monitoring.nix @@ -6,6 +6,12 @@ ../module/upc-qaire-exporter.nix ]; + age.secrets.grafanaJungleRobotPassword = { + file = ../../secrets/jungle-robot-password.age; + owner = "grafana"; + mode = "400"; + }; + services.grafana = { enable = true; settings = { @@ -16,6 +22,16 @@ http_port = 2342; http_addr = "127.0.0.1"; }; + smtp = { + enabled = true; + from_address = "jungle-robot@bsc.es"; + user = "jungle-robot"; + # Read the password from a file, which is only readable by grafana user + # https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider + password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}"; + host = "mail.bsc.es:465"; + startTLS_policy = "NoStartTLS"; + }; feature_toggles.publicDashboards = true; "auth.anonymous".enabled = true; log.level = "warn"; diff --git a/secrets/jungle-robot-password.age b/secrets/jungle-robot-password.age index af68a1b2dbc1f8ac9bb13b277a4174375cfdf0ea..a25102e933510c475bfb6ad9eef0c05adad3e44c 100644 GIT binary patch delta 626 zcmX@jvXga!PQ5`%fpe5;rKhp6WkGs?w|j=MhnGciUUr6ej&FWtWTtnJVQ#)*a#?n+ z0heo7Sb>*CUS+v{xTTYsX-K)LrDv9xvuQw*Noin&sgHYJRX}o@enx1q0hg|wLUD11 zZfc5=si~o*g1f0tglV}#WTK0scdDafXV$k#~`HzKfT>Nn&L& zx^>Qu;qE4Z3Yq%J1-{AtiBSa}=81*n#X2z29@QBo?(&Nj#XI&nT1t}d7+U7d6tfTL5WGF>A?|6t`k3shr5Ji zMik~bR~c6MrC0itScH}3`DcguxRj?Q8WegJggOP5TlyC37kYR&a+P~$1xH5uhq`)Y z2V@pkgqZpIg&U^kM`mklyE+#Y8)hb1mbykp6gvh+PL5|3ulLF=^D1_&2oDH1Niqux zF*C}oG6^qEt}u5ku5<|qOS3d|v@9sL)Gp5OI0!nFIptNW~adG0@fHa}nd>R#%dvqh3ER7Y^_lb@fW zb@%Q$v`cCQyG>~FtknLd&41qZyqI_5^X4{9wuXzWPurBz)}Nks>xJaDs6*<`09cFL Ad;kCd delta 496 zcmdnVdYWZ|PPkWKrg3FvRbFC-yP=0+WwC{6P>MlbdcM1HK~+&jiBooYd8T%xv$lJ1 zI#-xSdSXUJMW%mNZkl0bx_(NQNn}Y>fqSt@dO(Otv7w23hGB?)WT~%rhki%a#h%|dd`%p6Su-AzpmCZA;#uQy8!D2vST3GoSc4=cAc%gZqd3^z?QHuOlW zN(`{@%rDDMO-ZZB3-PZ^<}x>qtnkS7^f2(PD$x$kFe}LkD|0qC%y!Oq3-R>NPAb<_5ita9-&$SHOV^bIL6%y4lkFif+EOw7&q4fA#` z$}BO9%;%c$rsdB^^emnabr5U}QPG2_k(Nsw`-#+nk n%mx!{UBg4(9X)#=D6i9$X`H6Iu2ypLFVPYu5tr)tQ^qd Date: Thu, 5 Jun 2025 11:11:13 +0200 Subject: [PATCH 352/472] Add GitLab shell runner in tent for PM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 1 + m/tent/gitlab-runner.nix | 41 +++++++++++++++++++ secrets/secrets.nix | 4 ++ secrets/tent-gitlab-runner-pm-shell-token.age | 13 ++++++ 4 files changed, 59 insertions(+) create mode 100644 m/tent/gitlab-runner.nix create mode 100644 secrets/tent-gitlab-runner-pm-shell-token.age diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 8449b37..9a242ac 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -9,6 +9,7 @@ ./monitoring.nix ./nginx.nix ./nix-serve.nix + ./gitlab-runner.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/tent/gitlab-runner.nix b/m/tent/gitlab-runner.nix new file mode 100644 index 0000000..aa1dbd5 --- /dev/null +++ b/m/tent/gitlab-runner.nix @@ -0,0 +1,41 @@ +{ pkgs, lib, config, ... }: + +{ + age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age; + + services.gitlab-runner = let sec = config.age.secrets; in { + enable = true; + settings.concurrent = 5; + services = { + # For gitlab.pm.bsc.es + gitlab-pm-shell = { + executor = "shell"; + environmentVariables = { + SHELL = "${pkgs.bash}/bin/bash"; + }; + authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-shell.path; + preGetSourcesScript = pkgs.writeScript "setup" '' + echo "This is the preGetSources script running, brace for impact" + env + ''; + }; + }; + }; + + systemd.services.gitlab-runner.serviceConfig = { + DynamicUser = lib.mkForce false; + User = "gitlab-runner"; + Group = "gitlab-runner"; + ExecStart = lib.mkForce + ''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}''; + }; + + users.users.gitlab-runner = { + uid = config.ids.uids.gitlab-runner; + home = "/var/lib/gitlab-runner"; + description = "Gitlab Runner"; + group = "gitlab-runner"; + createHome = true; + }; + users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner; +} diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 450c828..ad1a4d0 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -3,6 +3,7 @@ let adminsKeys = builtins.attrValues keys.admins; hut = [ keys.hosts.hut ] ++ adminsKeys; mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys; + tent = [ keys.hosts.tent ] ++ adminsKeys; # Only expose ceph keys to safe nodes and admins safe = keys.hostGroup.safe ++ adminsKeys; in @@ -15,6 +16,9 @@ in "jungle-robot-password.age".publicKeys = mon; "ipmi.yml.age".publicKeys = mon; + "tent-gitlab-runner-pm-docker-token.age".publicKeys = tent; + "tent-gitlab-runner-pm-shell-token.age".publicKeys = tent; + "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; } diff --git a/secrets/tent-gitlab-runner-pm-shell-token.age b/secrets/tent-gitlab-runner-pm-shell-token.age new file mode 100644 index 0000000..1940789 --- /dev/null +++ b/secrets/tent-gitlab-runner-pm-shell-token.age @@ -0,0 +1,13 @@ +age-encryption.org/v1 +-> ssh-ed25519 G5LX5w V9bHLoGuY4stRwbzVS9Qa0L9yoY+UoCoXc+dJJQW/Ag +2ut9GfdJ3KBCqZRaloZCQsl8MLfaZAZxqj6JtPJzu2k +-> ssh-ed25519 CAWG4Q OAqnIfMECpKglZ7aF9tv/PQinG1Ou2+IEZ+nf4dtQjg +dANdMLe4iI0d6Xd/dIMpZK+mgw2+VmJFQScHaIxD7WI +-> ssh-ed25519 xA739A nVNF4Y6VSa5PP6FFBJpVmoFYYseoFx5F2wJU+Pwk+Xk +A5CiuTSNlX9Y76qhYgblBdJl3zPhtjWho2oL5/sIKu0 +-> ssh-ed25519 MSF3dg /WMsGnBGzquIMyw06gHKpSS4OUxheulT59kxi+/pxxU +ppwcv7RLzUbQUM7j0Tb9rRVT9XyPMhqYr2fr4S0nTJY +--- zOe0Ko0oxArbmxePMPDVAT0pDju7IeOAih7sNrDcoVs +ikA +hODVw! E݈+`C5LAtM^ E<HI_nno?j- +AnԔί>ZzdTb"(@{_ځC \ No newline at end of file -- 2.49.0 From 7b192929127609920300bdcf900a475014f867aa Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 13:49:51 +0200 Subject: [PATCH 353/472] Add docker GitLab runner for BSC GitLab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/gitlab-runner.nix | 44 ++++++++++++++++++ secrets/secrets.nix | 1 + .../tent-gitlab-runner-bsc-docker-token.age | Bin 0 -> 628 bytes 3 files changed, 45 insertions(+) create mode 100644 secrets/tent-gitlab-runner-bsc-docker-token.age diff --git a/m/tent/gitlab-runner.nix b/m/tent/gitlab-runner.nix index aa1dbd5..447c583 100644 --- a/m/tent/gitlab-runner.nix +++ b/m/tent/gitlab-runner.nix @@ -2,6 +2,7 @@ { age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age; + age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age; services.gitlab-runner = let sec = config.age.secrets; in { enable = true; @@ -19,6 +20,48 @@ env ''; }; + gitlab-bsc-docker = { + # gitlab.bsc.es still uses the old token mechanism + registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path; + tagList = [ "docker" "tent" "nix" ]; + executor = "docker"; + dockerImage = "alpine"; + dockerVolumes = [ + "/nix/store:/nix/store:ro" + "/nix/var/nix/db:/nix/var/nix/db:ro" + "/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro" + ]; + dockerDisableCache = true; + registrationFlags = [ + # Increase build log length to 64 MiB + "--output-limit 65536" + ]; + preBuildScript = pkgs.writeScript "setup-container" '' + mkdir -p -m 0755 /nix/var/log/nix/drvs + mkdir -p -m 0755 /nix/var/nix/gcroots + mkdir -p -m 0755 /nix/var/nix/profiles + mkdir -p -m 0755 /nix/var/nix/temproots + mkdir -p -m 0755 /nix/var/nix/userpool + mkdir -p -m 1777 /nix/var/nix/gcroots/per-user + mkdir -p -m 1777 /nix/var/nix/profiles/per-user + mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root + mkdir -p -m 0700 "$HOME/.nix-defexpr" + mkdir -p -m 0700 "$HOME/.ssh" + cat >> "$HOME/.ssh/known_hosts" << EOF + bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT + gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3 + EOF + . ${pkgs.nix}/etc/profile.d/nix-daemon.sh + # Required to load SSL certificate paths + . ${pkgs.cacert}/nix-support/setup-hook + ''; + environmentVariables = { + ENV = "/etc/profile"; + USER = "root"; + NIX_REMOTE = "daemon"; + PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin"; + }; + }; }; }; @@ -35,6 +78,7 @@ home = "/var/lib/gitlab-runner"; description = "Gitlab Runner"; group = "gitlab-runner"; + extraGroups = [ "docker" ]; createHome = true; }; users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner; diff --git a/secrets/secrets.nix b/secrets/secrets.nix index ad1a4d0..791a3ac 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -18,6 +18,7 @@ in "tent-gitlab-runner-pm-docker-token.age".publicKeys = tent; "tent-gitlab-runner-pm-shell-token.age".publicKeys = tent; + "tent-gitlab-runner-bsc-docker-token.age".publicKeys = tent; "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; diff --git a/secrets/tent-gitlab-runner-bsc-docker-token.age b/secrets/tent-gitlab-runner-bsc-docker-token.age new file mode 100644 index 0000000000000000000000000000000000000000..8c69121a61468287707e6ec89d75db7b18a628c2 GIT binary patch literal 628 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4H}#1yEmugd%y5qg zDR(sXFVpr;F35~X%5e2@^-C_abk;Y~cTUsKaxTp-k2ET)@a0PN^v`lJ%P&v%3Np_O zH#V*EFbp?JweWFDaY=IuNDK2VH!~_NGLEV$EJwG^*)iPRBv2tcGOO65tlZ4VGAA;^ zB+4()J1eEqJkvSdB+c2!KR49GpfWwPEZEW9&6UeI#JMmr!bdy9Gt@uC(J?A2$IH0b z-6`L>+$Z1JB+9YO(=gdD%fmR-uo&I83P*EeOGky&#JsA+D0j2MWOwsQpTh8vqVOEA zl)ONbGK-?(EN%bt0@vig%F;p?Cs(e33LlRWPtR1#(x9-!yx=IusERV9{FKCU6SpWg zk1P|jaL1%Xau%{+1pDt*lZtK9wEEu71p43eS>D+<%ntAdRZ zEA!H_@(g^=!^^pJb#)ageGDCoirmYSGAq2RLQN{eB7?k)JQ7{Q&C)yrs*=s~Jq^r5wGGmP zK(4!|aX~7rd#{7@y_}lZGe2tV89w;x%W0_JHaKoORe!ol#JUXal)_k%t%X^i7GGx# zTUy`DC6M@Rg3+gCQrt|1oRz7Qn?G;b%k8ACxa Date: Thu, 12 Jun 2025 15:24:31 +0200 Subject: [PATCH 354/472] Add public html files to tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/nginx.nix | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix index f67a525..bf73995 100644 --- a/m/tent/nginx.nix +++ b/m/tent/nginx.nix @@ -48,6 +48,12 @@ in proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; } + location ~ ^/~(.+?)(/.*)?$ { + alias /vault/home/$1/public_html$2; + index index.html index.htm; + autoindex on; + absolute_redirect off; + } ''; }; }; -- 2.49.0 From f6ec1293f4bd0c8c168743cc1eda192eacdb2037 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 16:38:40 +0200 Subject: [PATCH 355/472] Use IPv4 for blackbox exporter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/blackbox.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/m/tent/blackbox.yml b/m/tent/blackbox.yml index d716359..ccd701e 100644 --- a/m/tent/blackbox.yml +++ b/m/tent/blackbox.yml @@ -3,6 +3,7 @@ modules: prober: http timeout: 5s http: + preferred_ip_protocol: "ip4" follow_redirects: true valid_status_codes: [] # Defaults to 2xx method: GET -- 2.49.0 From fc68d1619710d1f86e428e4ee6fc65e51119898b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 12 Jun 2025 16:47:40 +0200 Subject: [PATCH 356/472] Monitor AXLE machine too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/monitoring.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix index e2ffd02..504cab1 100644 --- a/m/tent/monitoring.nix +++ b/m/tent/monitoring.nix @@ -166,6 +166,7 @@ "arenys5.ac.upc.edu" "arenys0-2.ac.upc.edu" "epi01.bsc.es" + "axle.bsc.es" ]; }]; relabel_configs = [ -- 2.49.0 From 68ff45075c822292cf6cc9bd6cf30564773ffe1f Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 09:40:17 +0200 Subject: [PATCH 357/472] Add bsc.es to resolve domain names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 9a242ac..e680a41 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -26,6 +26,7 @@ # Only BSC DNSs seem to be reachable from the office VLAN nameservers = [ "84.88.52.35" "84.88.52.36" ]; + search = [ "bsc.es" ]; defaultGateway = "10.0.44.1"; }; -- 2.49.0 From 560003d4fdd5b3b9ae36b0c1192202b83e5e86a4 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 11:10:39 +0200 Subject: [PATCH 358/472] Enable gitea in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 1 + m/tent/gitea.nix | 29 +++++++++++++++++++++++++++++ m/tent/nginx.nix | 6 ++++++ 3 files changed, 36 insertions(+) create mode 100644 m/tent/gitea.nix diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index e680a41..043554f 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -10,6 +10,7 @@ ./nginx.nix ./nix-serve.nix ./gitlab-runner.nix + ./gitea.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/tent/gitea.nix b/m/tent/gitea.nix new file mode 100644 index 0000000..031fd71 --- /dev/null +++ b/m/tent/gitea.nix @@ -0,0 +1,29 @@ +{ config, lib, ... }: +{ + services.gitea = { + enable = true; + appName = "Gitea in the jungle"; + + settings = { + server = { + ROOT_URL = "https://jungle.bsc.es/git/"; + LOCAL_ROOT_URL = "https://jungle.bsc.es/git/"; + LANDING_PAGE = "explore"; + }; + metrics.ENABLED = true; + service = { + REGISTER_MANUAL_CONFIRM = true; + ENABLE_NOTIFY_MAIL = true; + }; + log.LEVEL = "Warn"; + + mailer = { + ENABLED = true; + FROM = "jungle-robot@bsc.es"; + PROTOCOL = "sendmail"; + SENDMAIL_PATH = "/run/wrappers/bin/sendmail"; + SENDMAIL_ARGS = "--"; + }; + }; + }; +} diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix index bf73995..72a28ad 100644 --- a/m/tent/nginx.nix +++ b/m/tent/nginx.nix @@ -34,6 +34,12 @@ in real_ip_recursive on; real_ip_header X-Forwarded-For; + location /git { + rewrite ^/git$ / break; + rewrite ^/git/(.*) /$1 break; + proxy_pass http://127.0.0.1:3000; + proxy_redirect http:// $scheme://; + } location /cache { rewrite ^/cache/(.*) /$1 break; proxy_pass http://127.0.0.1:5000; -- 2.49.0 From 8af1b259f51ae6ddc46436de32b54e808a19d43d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 11:52:10 +0200 Subject: [PATCH 359/472] Enable public-inbox service in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 1 + m/tent/nginx.nix | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 043554f..bf18f91 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -11,6 +11,7 @@ ./nix-serve.nix ./gitlab-runner.nix ./gitea.nix + ../hut/public-inbox.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix index 72a28ad..0e6811a 100644 --- a/m/tent/nginx.nix +++ b/m/tent/nginx.nix @@ -45,6 +45,10 @@ in proxy_pass http://127.0.0.1:5000; proxy_redirect http:// $scheme://; } + location /lists { + proxy_pass http://127.0.0.1:8081; + proxy_redirect http:// $scheme://; + } location /grafana { proxy_pass http://127.0.0.1:2342; proxy_redirect http:// $scheme://; -- 2.49.0 From 47ad89dee1995b1578821dec7e82368db82f7b98 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 12:53:58 +0200 Subject: [PATCH 360/472] Add p service for pastes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/module/p.nix | 68 ++++++++++++++++++++++++++++++++++++++++ m/tent/configuration.nix | 3 ++ m/tent/nginx.nix | 3 ++ 3 files changed, 74 insertions(+) create mode 100644 m/module/p.nix diff --git a/m/module/p.nix b/m/module/p.nix new file mode 100644 index 0000000..2005eb8 --- /dev/null +++ b/m/module/p.nix @@ -0,0 +1,68 @@ +{ config, lib, pkgs, ... }: + +let + cfg = config.services.p; +in +{ + options = { + services.p = { + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = "Whether to enable the p service."; + }; + path = lib.mkOption { + type = lib.types.str; + default = "/var/lib/p"; + description = "Where to save the pasted files on disk."; + }; + url = lib.mkOption { + type = lib.types.str; + default = "https://jungle.bsc.es/p"; + description = "URL prefix for the printed file."; + }; + }; + }; + + config = lib.mkIf cfg.enable { + environment.systemPackages = let + p = pkgs.writeShellScriptBin "p" '' + set -e + pastedir="${cfg.path}/$USER" + cd "$pastedir" + + ext="txt" + if [ -n "$1" ]; then + ext="$1" + fi + + out=$(mktemp "XXXXXXXX.$ext") + cat > "$out" + chmod go+r "$out" + echo "${cfg.url}/$USER/$out" + ''; + in [ p ]; + + systemd.services.p = let + # Take only normal users + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + # Create a directory for each user + commands = lib.concatLists (lib.mapAttrsToList (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0755 ${cfg.path}/${user.name}" + ]) users); + in { + description = "P service setup"; + requires = [ "network-online.target" ]; + #wants = [ "remote-fs.target" ]; + #after = [ "remote-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + ExecStart = pkgs.writeShellScript "p-init.sh" ('' + + install -d -o root -g root -m 0755 ${cfg.path} + + '' + (lib.concatLines commands)); + }; + }; + }; +} diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index bf18f91..327d4d4 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -12,6 +12,7 @@ ./gitlab-runner.nix ./gitea.nix ../hut/public-inbox.nix + ../module/p.nix ]; # Select the this using the ID to avoid mismatches @@ -32,6 +33,8 @@ defaultGateway = "10.0.44.1"; }; + services.p.enable = true; + services.prometheus.exporters.node = { enable = true; enabledCollectors = [ "systemd" ]; diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix index 0e6811a..de9214e 100644 --- a/m/tent/nginx.nix +++ b/m/tent/nginx.nix @@ -64,6 +64,9 @@ in autoindex on; absolute_redirect off; } + location /p/ { + alias /var/lib/p/; + } ''; }; }; -- 2.49.0 From 810a6dfcec3b68f68e82eb464cf7ccc525be99f7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 15:09:54 +0200 Subject: [PATCH 361/472] Move nix-daemon exporter to modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/hut/monitoring.nix | 2 +- m/{hut => module}/nix-daemon-builds.sh | 0 m/{hut => module}/nix-daemon-exporter.nix | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename m/{hut => module}/nix-daemon-builds.sh (100%) rename m/{hut => module}/nix-daemon-exporter.nix (100%) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index db5f49f..0c1cb78 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -6,7 +6,7 @@ ../module/meteocat-exporter.nix ../module/upc-qaire-exporter.nix ./gpfs-probe.nix - ./nix-daemon-exporter.nix + ../module/nix-daemon-exporter.nix ]; age.secrets.grafanaJungleRobotPassword = { diff --git a/m/hut/nix-daemon-builds.sh b/m/module/nix-daemon-builds.sh similarity index 100% rename from m/hut/nix-daemon-builds.sh rename to m/module/nix-daemon-builds.sh diff --git a/m/hut/nix-daemon-exporter.nix b/m/module/nix-daemon-exporter.nix similarity index 100% rename from m/hut/nix-daemon-exporter.nix rename to m/module/nix-daemon-exporter.nix -- 2.49.0 From 24a0c585924a0387467c8b32f31744e997bda64c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 15:11:24 +0200 Subject: [PATCH 362/472] Monitor nix-daemon in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/monitoring.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix index 504cab1..2d1bdc5 100644 --- a/m/tent/monitoring.nix +++ b/m/tent/monitoring.nix @@ -4,6 +4,7 @@ imports = [ ../module/meteocat-exporter.nix ../module/upc-qaire-exporter.nix + ../module/nix-daemon-exporter.nix ]; age.secrets.grafanaJungleRobotPassword = { @@ -116,6 +117,7 @@ "127.0.0.1:9290" # IPMI exporter for local node "127.0.0.1:9928" # UPC Qaire custom exporter "127.0.0.1:9929" # Meteocat custom exporter + "127.0.0.1:9999" # Nix-daemon custom exporter ]; }]; } -- 2.49.0 From 071a8084a034cfd2e41d75a17d1be65d196bfc0a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 15:52:31 +0200 Subject: [PATCH 363/472] Add GitLab runner with debian docker for PM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/gitlab-runner.nix | 8 ++++++++ secrets/tent-gitlab-runner-pm-docker-token.age | Bin 0 -> 623 bytes 2 files changed, 8 insertions(+) create mode 100644 secrets/tent-gitlab-runner-pm-docker-token.age diff --git a/m/tent/gitlab-runner.nix b/m/tent/gitlab-runner.nix index 447c583..166b8ca 100644 --- a/m/tent/gitlab-runner.nix +++ b/m/tent/gitlab-runner.nix @@ -2,6 +2,7 @@ { age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age; + age.secrets.tent-gitlab-runner-pm-docker.file = ../../secrets/tent-gitlab-runner-pm-docker-token.age; age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age; services.gitlab-runner = let sec = config.age.secrets; in { @@ -20,6 +21,13 @@ env ''; }; + gitlab-pm-docker = { + authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-docker.path; + executor = "docker"; + dockerImage = "debian:stable"; + }; + + # For gitlab.bsc.es gitlab-bsc-docker = { # gitlab.bsc.es still uses the old token mechanism registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path; diff --git a/secrets/tent-gitlab-runner-pm-docker-token.age b/secrets/tent-gitlab-runner-pm-docker-token.age new file mode 100644 index 0000000000000000000000000000000000000000..6ce71b4d295043882819fa1c40a28fbfa3e75665 GIT binary patch literal 623 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4H}#1yEmsINGfhr2 z%FOaFta2%=Fepj%&`-}y$xHQ3)OHRFv@{7!bM{D#3Q8&rcja=fFf7bTH4RJ5POD5Q z^~$$&j`a5RNiRw?sVI!{EDQ-MH!^X}s&ol(_eHnO*)iPRBv7H$q$J6tG@>}fG)g}w z!^}IS+)zI#z`!}htt`u}A~&ldz^}k8BD1pGF`3IT&m_RdGc(YyG^nc3Gt{KYEi)*m z%)BJsL_aSjJ3p)Y&0}cJleacgk@?E_$%gb}Ui$a`?EJAYAiW42H zii4d>ebV#uT?(t5f?T=MLbI~+Jaf_t^j(btUA3Juj4LdS%JUo(v&+nljYCttOPtCA z9Ss8wqLR6Eb#)cW4Ff{`f?Z9E4SjAh+xqgV5$DHktZxpcsw<|ip1Qk?k4Io%iArv-AOFe=tvM~r-3k)T Wl{yvf*+;%ry(4#d{v+uI_4xo7``Ii2 literal 0 HcmV?d00001 -- 2.49.0 From 42d6734da8ea93aefe3944e619ceb668f70cf79c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 16 Jun 2025 15:40:06 +0200 Subject: [PATCH 364/472] Enable msmtp configuration in tent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows gitea to send notifications via email. Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 327d4d4..956a191 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -12,6 +12,7 @@ ./gitlab-runner.nix ./gitea.nix ../hut/public-inbox.nix + ../hut/msmtp.nix ../module/p.nix ]; -- 2.49.0 From af540456a626760f2e5381f45448b889d59afd39 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 16 Jun 2025 15:55:53 +0200 Subject: [PATCH 365/472] Disable registration in Gitea MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Get rid of all the spam accounts they are trying to register. Reviewed-by: Aleix Boné Reviewed-by: Aleix Roca Nonell --- m/tent/gitea.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/tent/gitea.nix b/m/tent/gitea.nix index 031fd71..546ac5f 100644 --- a/m/tent/gitea.nix +++ b/m/tent/gitea.nix @@ -12,6 +12,7 @@ }; metrics.ENABLED = true; service = { + DISABLE_REGISTRATION = true; REGISTER_MANUAL_CONFIRM = true; ENABLE_NOTIFY_MAIL = true; }; -- 2.49.0 From d8444131d8aefc2587a2fd9855dc69039fb8a56c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 10:26:59 +0200 Subject: [PATCH 366/472] Enable SSH X11 forwarding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index ea793e7..076cd2f 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -22,6 +22,8 @@ # Use performance for benchmarks powerManagement.cpuFreqGovernor = "performance"; + services.openssh.settings.X11Forwarding = true; + networking = { timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ]; hostName = "fox"; -- 2.49.0 From 9b0d3fb21e4ad917a10a81dd7e804b0829833f42 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 13 Jun 2025 13:14:47 +0200 Subject: [PATCH 367/472] Load amd_uncore module in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Needed for L3 events in perf. Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 076cd2f..cc0527e 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -14,7 +14,7 @@ swapDevices = lib.mkForce []; boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ]; - boot.kernelModules = [ "kvm-amd" ]; + boot.kernelModules = [ "kvm-amd" "amd_uncore" ]; hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; hardware.cpu.intel.updateMicrocode = lib.mkForce false; -- 2.49.0 From 40529fbdcb14faa42292dc0b5b58d7ecd3bf7043 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 17 Jun 2025 14:04:46 +0200 Subject: [PATCH 368/472] Disable NUMA balancing in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See: https://www.kernel.org/doc/html/latest/admin-guide/sysctl/kernel.html#numa-balancing Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index cc0527e..a1a33fd 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -22,6 +22,9 @@ # Use performance for benchmarks powerManagement.cpuFreqGovernor = "performance"; + # Disable NUMA balancing + boot.kernel.sysctl."kernel.numa_balancing" = 0; + services.openssh.settings.X11Forwarding = true; networking = { -- 2.49.0 From 479ca1b67138e30d89e17a84cf1e9984663deb2a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 18 Jun 2025 11:07:19 +0200 Subject: [PATCH 369/472] Disable kptr_restrict in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index a1a33fd..3fb9010 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -25,6 +25,9 @@ # Disable NUMA balancing boot.kernel.sysctl."kernel.numa_balancing" = 0; + # Expose kernel addresses + boot.kernel.sysctl."kernel.kptr_restrict" = 0; + services.openssh.settings.X11Forwarding = true; networking = { -- 2.49.0 From 208197f099e9cf62f9b6be731287032a3d8188df Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 18 Jun 2025 16:36:34 +0200 Subject: [PATCH 370/472] Add ac.upc.edu as name search server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows referring to fox.ac.upc.edu directly as fox. Reviewed-by: Aleix Boné --- m/tent/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 956a191..1b7473f 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -30,7 +30,7 @@ # Only BSC DNSs seem to be reachable from the office VLAN nameservers = [ "84.88.52.35" "84.88.52.36" ]; - search = [ "bsc.es" ]; + search = [ "bsc.es" "ac.upc.edu" ]; defaultGateway = "10.0.44.1"; }; -- 2.49.0 From f29461ae3234570b341af1d7ba9b964389836e36 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 17 Jun 2025 14:29:15 +0200 Subject: [PATCH 371/472] Add OpenVPN service to connect to fox BMC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/vpn-dac.nix | 34 +++++++++++ m/module/vpn-dac/ca.crt | 31 ++++++++++ m/module/vpn-dac/client.crt | 100 +++++++++++++++++++++++++++++++++ m/tent/configuration.nix | 1 + secrets/secrets.nix | 2 + secrets/vpn-dac-client-key.age | Bin 0 -> 2246 bytes secrets/vpn-dac-login.age | Bin 0 -> 568 bytes 7 files changed, 168 insertions(+) create mode 100644 m/module/vpn-dac.nix create mode 100644 m/module/vpn-dac/ca.crt create mode 100644 m/module/vpn-dac/client.crt create mode 100644 secrets/vpn-dac-client-key.age create mode 100644 secrets/vpn-dac-login.age diff --git a/m/module/vpn-dac.nix b/m/module/vpn-dac.nix new file mode 100644 index 0000000..5e8e67a --- /dev/null +++ b/m/module/vpn-dac.nix @@ -0,0 +1,34 @@ +{config, ...}: +{ + age.secrets.vpn-dac-login.file = ../../secrets/vpn-dac-login.age; + age.secrets.vpn-dac-client-key.file = ../../secrets/vpn-dac-client-key.age; + + services.openvpn.servers = { + # systemctl status openvpn-dac.service + dac = { + config = '' + client + dev tun + proto tcp + remote vpn.ac.upc.edu 1194 + remote vpn.ac.upc.edu 80 + resolv-retry infinite + nobind + persist-key + persist-tun + ca ${./vpn-dac/ca.crt} + cert ${./vpn-dac/client.crt} + # Only key needs to be secret + key ${config.age.secrets.vpn-dac-client-key.path} + remote-cert-tls server + comp-lzo + verb 3 + auth-user-pass ${config.age.secrets.vpn-dac-login.path} + reneg-sec 0 + + # Ignore 10.0.0.0 route as is not needed + pull-filter ignore "route 10.0.0.0" + ''; + }; + }; +} diff --git a/m/module/vpn-dac/ca.crt b/m/module/vpn-dac/ca.crt new file mode 100644 index 0000000..af1427e --- /dev/null +++ b/m/module/vpn-dac/ca.crt @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFUjCCBDqgAwIBAgIJAJH118PApk5hMA0GCSqGSIb3DQEBCwUAMIHLMQswCQYD +VQQGEwJFUzESMBAGA1UECBMJQmFyY2Vsb25hMRIwEAYDVQQHEwlCYXJjZWxvbmEx +LTArBgNVBAoTJFVuaXZlcnNpdGF0IFBvbGl0ZWNuaWNhIGRlIENhdGFsdW55YTEk +MCIGA1UECxMbQXJxdWl0ZWN0dXJhIGRlIENvbXB1dGFkb3JzMRAwDgYDVQQDEwdM +Q0FDIENBMQ0wCwYDVQQpEwRMQ0FDMR4wHAYJKoZIhvcNAQkBFg9sY2FjQGFjLnVw +Yy5lZHUwHhcNMTYwMTEyMTI0NDIxWhcNNDYwMTEyMTI0NDIxWjCByzELMAkGA1UE +BhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0w +KwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAi +BgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENB +QyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMu +ZWR1MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0CteSeof7Xwi51kC +F0nQ4E9iR5Lq7wtfRuVPn6JJcIxJJ6+F9gr4R/HIHTztW4XAzReE36DYfexupx3D +6UgQIkMLlVyGqRbulNF+RnCx20GosF7Dm4RGBVvOxBP1PGjYq/A+XhaaDAFd0cOF +LMNkzuYP7PF0bnBEaHnxmN8bPmuyDyas7fK9AAc3scyWT2jSBPbOVFvCJwPg8MH9 +V/h+hKwL/7hRt1MVfVv2qyIuKwTki8mUt0RcVbP7oJoRY5K1+R52phIz/GL/b4Fx +L6MKXlQxLi8vzP4QZXgCMyV7oFNdU3VqCEXBA11YIRvsOZ4QS19otIk/ZWU5x+HH +LAIJ7wIDAQABo4IBNTCCATEwHQYDVR0OBBYEFNyezX1cH1N4QR14ebBpljqmtE7q +MIIBAAYDVR0jBIH4MIH1gBTcns19XB9TeEEdeHmwaZY6prRO6qGB0aSBzjCByzEL +MAkGA1UEBhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vs +b25hMS0wKwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVu +eWExJDAiBgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UE +AxMHTENBQyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0Bh +Yy51cGMuZWR1ggkAkfXXw8CmTmEwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsF +AAOCAQEAUAmOvVXIQrR+aZVO0bOTeugKBHB75eTIZSIHIn2oDUvDbAP5GXIJ56A1 +6mZXxemSMY8/9k+pRcwJhfat3IgvAN159XSqf9kRv0NHgc3FWUI1Qv/BsAn0vJO/ +oK0dbmbbRWqt86qNrCN+cUfz5aovvxN73jFfnvfDQFBk/8enj9wXxYfokjjLPR1Q ++oTkH8dY68qf71oaUB9MndppPEPSz0K1S6h1XxvJoSu9MVSXOQHiq1cdZdxRazI3 +4f7q9sTCL+khwDAuZxAYzlEYxFFa/NN8PWU6xPw6V+t/aDhOiXUPJQB/O/K7mw3Z +TQQx5NqM7B5jjak5fauR3/oRD8XXsA== +-----END CERTIFICATE----- diff --git a/m/module/vpn-dac/client.crt b/m/module/vpn-dac/client.crt new file mode 100644 index 0000000..aec0d98 --- /dev/null +++ b/m/module/vpn-dac/client.crt @@ -0,0 +1,100 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: 2 (0x2) + Signature Algorithm: sha256WithRSAEncryption + Issuer: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu + Validity + Not Before: Jan 12 12:45:41 2016 GMT + Not After : Jan 12 12:45:41 2046 GMT + Subject: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=client/name=LCAC/emailAddress=lcac@ac.upc.edu + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + Public-Key: (2048 bit) + Modulus: + 00:97:99:fa:7a:0e:4d:e2:1d:a5:b1:a8:14:18:64: + c7:66:bf:de:99:1d:92:3b:86:82:4d:95:39:f7:a6: + 56:49:97:14:4f:e3:37:00:6c:f4:d0:1d:56:79:e7: + 19:b5:dd:36:15:8e:1d:57:7b:59:29:d2:11:bf:58: + 48:e0:f7:41:3d:16:64:8d:a2:0b:4a:ac:fa:c6:83: + dc:10:2a:2c:d9:97:48:ee:11:2a:bc:4b:60:dd:b9: + 2e:8f:45:ca:87:0b:38:65:1c:f8:a2:1d:f9:50:aa: + 6e:60:f9:48:df:57:12:23:e1:e7:0c:81:5c:9f:c5: + b2:e6:99:99:95:30:6d:57:36:06:8c:fd:fb:f9:4f: + 60:d2:3c:ba:ae:28:56:2f:da:58:5c:e8:c5:7b:ec: + 76:d9:28:6e:fb:8c:07:f9:d7:23:c3:72:76:3c:fa: + dc:20:67:8f:cc:16:e0:91:07:d5:68:f9:20:4d:7d: + 5c:2d:02:04:16:76:52:f3:53:be:a3:dc:0d:d5:fb: + 6b:55:29:f3:52:35:c8:7d:99:d1:4a:94:be:b1:8e: + fd:85:18:25:eb:41:e9:56:da:af:62:84:20:0a:00: + 17:94:92:94:91:6a:f8:54:37:17:ee:1e:bb:fb:93: + 71:91:d9:e4:e9:b8:3b:18:7d:6d:7d:4c:ce:58:55: + f9:41 + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Basic Constraints: + CA:FALSE + Netscape Comment: + Easy-RSA Generated Certificate + X509v3 Subject Key Identifier: + 1B:88:06:D5:33:1D:5C:48:46:B5:DE:78:89:36:96:91:3A:74:43:18 + X509v3 Authority Key Identifier: + keyid:DC:9E:CD:7D:5C:1F:53:78:41:1D:78:79:B0:69:96:3A:A6:B4:4E:EA + DirName:/C=ES/ST=Barcelona/L=Barcelona/O=Universitat Politecnica de Catalunya/OU=Arquitectura de Computadors/CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu + serial:91:F5:D7:C3:C0:A6:4E:61 + + X509v3 Extended Key Usage: + TLS Web Client Authentication + X509v3 Key Usage: + Digital Signature + X509v3 Subject Alternative Name: + DNS:client + Signature Algorithm: sha256WithRSAEncryption + 42:e8:50:b2:e7:88:75:86:0b:bb:29:e3:aa:c6:0e:4c:e8:ea: + 3d:0c:02:31:7f:3b:80:0c:3f:80:af:45:d6:62:27:a0:0e:e7: + 26:09:12:97:95:f8:d9:9b:89:b5:ef:56:64:f1:de:82:74:e0: + 31:0a:cc:90:0a:bd:50:b8:54:95:0a:ae:3b:40:df:76:b6:d1: + 01:2e:f3:96:9f:52:d4:e9:14:6d:b7:14:9d:45:99:33:36:2a: + 01:0b:15:1a:ed:55:dc:64:83:65:1a:06:42:d9:c7:dc:97:d4: + 02:81:c2:58:2b:ea:e4:b7:ae:84:3a:e4:3f:f1:2e:fa:ec:f3: + 40:5d:b8:6a:d5:5e:e1:e8:2f:e2:2f:48:a4:38:a1:4f:22:e3: + 4f:66:94:aa:02:78:9a:2b:7a:5d:aa:aa:51:a5:e3:d0:91:e9: + 1d:f9:08:ed:8b:51:c9:a6:af:46:85:b5:1c:ed:12:a1:28:33: + 75:36:00:d8:5c:14:65:96:c0:28:7d:47:50:a4:89:5f:b0:72: + 1a:4b:13:17:26:0f:f0:b8:65:3c:e9:96:36:f9:bf:90:59:33: + 87:1f:01:03:25:f8:f0:3a:9b:33:02:d0:0a:43:b5:0a:cf:62: + a1:45:38:37:07:9d:9c:94:0b:31:c6:3c:34:b7:fc:5a:0c:e4: + bf:23:f6:7d +-----BEGIN CERTIFICATE----- +MIIFqjCCBJKgAwIBAgIBAjANBgkqhkiG9w0BAQsFADCByzELMAkGA1UEBhMCRVMx +EjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0wKwYDVQQK +EyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAiBgNVBAsT +G0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENBQyBDQTEN +MAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MB4X +DTE2MDExMjEyNDU0MVoXDTQ2MDExMjEyNDU0MVowgcoxCzAJBgNVBAYTAkVTMRIw +EAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNlbG9uYTEtMCsGA1UEChMk +VW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1bnlhMSQwIgYDVQQLExtB +cnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxDzANBgNVBAMTBmNsaWVudDENMAsG +A1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl5n6eg5N4h2lsagUGGTHZr/emR2S +O4aCTZU596ZWSZcUT+M3AGz00B1WeecZtd02FY4dV3tZKdIRv1hI4PdBPRZkjaIL +Sqz6xoPcECos2ZdI7hEqvEtg3bkuj0XKhws4ZRz4oh35UKpuYPlI31cSI+HnDIFc +n8Wy5pmZlTBtVzYGjP37+U9g0jy6rihWL9pYXOjFe+x22Shu+4wH+dcjw3J2PPrc +IGePzBbgkQfVaPkgTX1cLQIEFnZS81O+o9wN1ftrVSnzUjXIfZnRSpS+sY79hRgl +60HpVtqvYoQgCgAXlJKUkWr4VDcX7h67+5Nxkdnk6bg7GH1tfUzOWFX5QQIDAQAB +o4IBljCCAZIwCQYDVR0TBAIwADAtBglghkgBhvhCAQ0EIBYeRWFzeS1SU0EgR2Vu +ZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQWBBQbiAbVMx1cSEa13niJNpaROnRD +GDCCAQAGA1UdIwSB+DCB9YAU3J7NfVwfU3hBHXh5sGmWOqa0TuqhgdGkgc4wgcsx +CzAJBgNVBAYTAkVTMRIwEAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNl +bG9uYTEtMCsGA1UEChMkVW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1 +bnlhMSQwIgYDVQQLExtBcnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxEDAOBgNV +BAMTB0xDQUMgQ0ExDTALBgNVBCkTBExDQUMxHjAcBgkqhkiG9w0BCQEWD2xjYWNA +YWMudXBjLmVkdYIJAJH118PApk5hMBMGA1UdJQQMMAoGCCsGAQUFBwMCMAsGA1Ud +DwQEAwIHgDARBgNVHREECjAIggZjbGllbnQwDQYJKoZIhvcNAQELBQADggEBAELo +ULLniHWGC7sp46rGDkzo6j0MAjF/O4AMP4CvRdZiJ6AO5yYJEpeV+NmbibXvVmTx +3oJ04DEKzJAKvVC4VJUKrjtA33a20QEu85afUtTpFG23FJ1FmTM2KgELFRrtVdxk +g2UaBkLZx9yX1AKBwlgr6uS3roQ65D/xLvrs80BduGrVXuHoL+IvSKQ4oU8i409m +lKoCeJorel2qqlGl49CR6R35CO2LUcmmr0aFtRztEqEoM3U2ANhcFGWWwCh9R1Ck +iV+wchpLExcmD/C4ZTzpljb5v5BZM4cfAQMl+PA6mzMC0ApDtQrPYqFFODcHnZyU +CzHGPDS3/FoM5L8j9n0= +-----END CERTIFICATE----- diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 1b7473f..57ac6e9 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -14,6 +14,7 @@ ../hut/public-inbox.nix ../hut/msmtp.nix ../module/p.nix + ../module/vpn-dac.nix ]; # Select the this using the ID to avoid mismatches diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 791a3ac..776e73f 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -19,6 +19,8 @@ in "tent-gitlab-runner-pm-docker-token.age".publicKeys = tent; "tent-gitlab-runner-pm-shell-token.age".publicKeys = tent; "tent-gitlab-runner-bsc-docker-token.age".publicKeys = tent; + "vpn-dac-login.age".publicKeys = tent; + "vpn-dac-client-key.age".publicKeys = tent; "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; diff --git a/secrets/vpn-dac-client-key.age b/secrets/vpn-dac-client-key.age new file mode 100644 index 0000000000000000000000000000000000000000..3e92d23553e801a07a342dd94960dc46a4799298 GIT binary patch literal 2246 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4H}#1yEmtt{PBBl3 zN;k;Lb_#NLF*kLo$PDxd3N`WaNGkWw&DYK{^v;hmO$^dbFXt-vk4!goODzp7H?AW z+$qDf%D>7d)gUK4%hcID(kw98BDW&l#K<@@q@2qn#52vzu_z@nIHfXOJ1Z};ETp_R z(xcQUAkoXhFeJ;lD&ITIH!C|R#}(bS3P*EeOGgDi*HU9=^TdKQAIB=+aO63z0x%(c@hD$BGhOx(?!d@PKjQe5-Riy|wH z9SdEZf?WL5^u5EqgG{)B^Q!{1lge_7QiI$KDoZl`Q{6+e)3dx2(;~7;vIE@{bMg(! zi_(hM+pA~OBb3XJp%1IvO^ zgG0Ig&Rw>4^P9ZXwmc^-oi9nktL{f|#;sgZ_MZ8#{H>L1U}-^ZvL_kp4Axs`;2{K1T+HX*LgCtYvo z?yUVaQ&Y-!ZKC%4$Ik@y+!o!D+j9D!+&a$I3tou=C)Kvqh3CF0H1epma>@ENd1i~| zo4vm*G}kOxn|J>z=QSJaUF)l^dp%wLiC;}h^_oHM?9WS^kI!S6Kh@eVO}8z%*IXlP zu}Z&_hS5g7^*&6F7vE{W`R4w?bZ=v~#iuoI`F>d}@Tyqs%KCBr%iX%Qk}Kb}=H-a8 z7YF+Oo;&H$hV;EnYTOP9rM9VIY>r35Di=#EIy|3;)5}`=`i`9o|Lr{gW_tUTw!-o? z>(uY)1sBft&S)vhd-b*`bHPOACyH)o9X-o+`o6w;VEEByrOKwKA4@D;_iD~O`eei7 z-O=0LaC&BZTeo1oYeDHF&4bUxN~|S*w_6>*lW|PBuOf4X{N)>6XXV_Oj(PVpw4_ec zTB5aO?hbz4z@$`_bkCl!D}P>Of4tvP9rpU0>Y|tv->02?`FMHpy?&w7&*BbnUS|(= zDo*Av%v~a@yhXUFRG#a_f6r4&$gvYdDU`p=4!nACTm=qQlx{r_ox4| zzf+REDS1QX=QZ+j6Qv5&2!H8j;~cLC6QYh zeS|NpTrPe0a@~=E(qRnlu8QaKHFwFn{G0ce&E(r8xxx zWrxeQcRyBkUzvYhNyBWz+K@>$IpVJ#?Y^^f=Zc^Chm1BT+pui@^!{!@B*&kI#uv_C z)^M&9iBnrvxLA~*JJ92{Plut#wNEB9?0Y8el$ktxN>I$*rnv08ZowU{t-PB28#U&g zX54f9mesv^s!r=|nsek#-g5h#3Tgh*lX&HszK(#!)yv^*w!PP7V;X1Aw^K++=idJ* zG`=%+{hXqR?ap!>8>E%_J%W~R>6yjU;N2d;Bu`M8^BMt<6$j#V(hKXnB70p7HkN zM2Ct0JTyZ(>{M@^xDdPEbc@G9i9Wsifg4u6Pc{^E{a?M~cZAZn3A~MUV)vQf#cJLA z)U~~|zU}6ps~-xkZ4H+_{3ki#T>8HMqVfFf=P{@?%O9ur1L$f$=+h~ zC16dezT%$GpZ)i|m^61nsr%#~%Il8F+ND^>WqaFP}X;vnKVof9N)@dEpJ= zZTD*(&VAu6)iRgM{TbS#FZWoI$x$&~YscnN=8*EO+x{0zls&DV_c)K~PlHm3_M-2S z9m>aIbADd0z7Wy7^SD{Zi#yY9>EE6xSv2kY?K>J8k9)m;)qJk|aKF>+p_#qe?~u@| z-5ymJ63X)}&#a0&kleia+u3P`4zEQFgKw@}TyFY#@&CFbLT{gG+dowHDA0-cy0oZ# Qm0Z=%Gg{``**OBO0Uq7`K>z>% literal 0 HcmV?d00001 diff --git a/secrets/vpn-dac-login.age b/secrets/vpn-dac-login.age new file mode 100644 index 0000000000000000000000000000000000000000..eb0e58beb0658020d16983549202c59de8124389 GIT binary patch literal 568 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4H}#1yEmyEe%&PP* zvM~29uJWl2G6+xi@X9W9(a$Y4GjTRACR&`CQt&Cu1f*cIKj3P*EeOGgDq_r&CgLjRC77yXRf%=~QQ?1&`i z;v{`zBSWvmte`M$|0*ww#H5fgqiimHecxomN<$-U6Q@cuPd~5x#0rm~6xVFas7PNE zue6kuASYw(l1evyi$HYSe1qMLQ_>Zxyz|p?auch3bDW$^lRUgFeJXsDi~P(igF}+M z9NmgTQ$h=qeInB=NBOg~+W;WJh}H^FVwl8Vb^Rm?I$b~Db;E@x|Hcqo3L{0H0Ao}MFOrOqrD<6I&? Qb@O@2Uz@R}Ea-|00LLr3z5oCK literal 0 HcmV?d00001 -- 2.49.0 From 22a2e1b9e811aff55df6f20aed40adfb585ec5fd Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 17 Jun 2025 16:41:25 +0200 Subject: [PATCH 372/472] Monitor fox via VPN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/tent/monitoring.nix | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix index 2d1bdc5..0180d1c 100644 --- a/m/tent/monitoring.nix +++ b/m/tent/monitoring.nix @@ -200,6 +200,17 @@ module = [ "raccoon" ]; }; } + { + job_name = "ipmi-fox"; + metrics_path = "/ipmi"; + static_configs = [ + { targets = [ "127.0.0.1:9290" ]; } + ]; + params = { + target = [ "fox-ipmi.ac.upc.edu" ]; + module = [ "fox" ]; + }; + } ]; }; } -- 2.49.0 From 7a52e1907c214cc1dc2bc70d8a0afda307c039c9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Jun 2025 14:47:55 +0200 Subject: [PATCH 373/472] Restrict DAC VPN to fox-ipmi machine only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/vpn-dac.nix | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/m/module/vpn-dac.nix b/m/module/vpn-dac.nix index 5e8e67a..e677c73 100644 --- a/m/module/vpn-dac.nix +++ b/m/module/vpn-dac.nix @@ -26,8 +26,9 @@ auth-user-pass ${config.age.secrets.vpn-dac-login.path} reneg-sec 0 - # Ignore 10.0.0.0 route as is not needed - pull-filter ignore "route 10.0.0.0" + # Only route fox-ipmi + pull-filter ignore "route " + route 147.83.35.27 255.255.255.255 ''; }; }; -- 2.49.0 From 815810830e96f580b4a5572b6e68b214d1164c4e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Jun 2025 16:06:50 +0200 Subject: [PATCH 374/472] Monitor Fox BMC with ICMP probes too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/tent/monitoring.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix index 0180d1c..c241806 100644 --- a/m/tent/monitoring.nix +++ b/m/tent/monitoring.nix @@ -165,6 +165,7 @@ "anella-bsc.cesca.cat" "upc-anella.cesca.cat" "fox.ac.upc.edu" + "fox-ipmi.ac.upc.edu" "arenys5.ac.upc.edu" "arenys0-2.ac.upc.edu" "epi01.bsc.es" -- 2.49.0 From adeaa0484d055b24089d55403e72364a5bf18f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Tue, 1 Jul 2025 14:59:39 +0200 Subject: [PATCH 375/472] Add all terminfo files in environment Fixes problems with the kitty terminal when opening vim or kakoune. Reviewed-by: Rodrigo Arias Mallo --- m/common/base/env.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/common/base/env.nix b/m/common/base/env.nix index 98943ed..d8e417b 100644 --- a/m/common/base/env.nix +++ b/m/common/base/env.nix @@ -21,6 +21,8 @@ } ]; + environment.enableAllTerminfo = true; + environment.variables = { EDITOR = "vim"; VISUAL = "vim"; -- 2.49.0 From db0f3fed911f5d1c040baebdcc6312f4c86018eb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 2 Jul 2025 15:20:05 +0200 Subject: [PATCH 376/472] Revert "Only allow Vincent to access fox for now" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e9e3704b677baed1649583f25e4e1bc050a9534e. Reviewed-by: Aleix Boné --- m/common/base/users.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 7e9817a..9bce966 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -69,7 +69,7 @@ home = "/home/Computational/anavarro"; description = "Antoni Navarro"; group = "Computational"; - hosts = [ "hut" "tent" "raccoon" ]; + hosts = [ "hut" "tent" "raccoon" "fox" ]; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" @@ -82,7 +82,7 @@ home = "/home/Computational/abonerib"; description = "Aleix Boné"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" "tent" "raccoon" ]; + hosts = [ "owl1" "owl2" "hut" "tent" "raccoon" "fox" ]; hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" @@ -121,7 +121,7 @@ home = "/home/Computational/dalvare1"; description = "David Álvarez"; group = "Computational"; - hosts = [ "hut" "tent" ]; + hosts = [ "hut" "tent" "fox" ]; hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" -- 2.49.0 From ee92934c741ec1e8df6b0eaf24ec746d83e50af8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 2 Jul 2025 15:20:51 +0200 Subject: [PATCH 377/472] Add access to fox for rpenacob user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 9bce966..3a1c276 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -56,7 +56,7 @@ home = "/home/Computational/rpenacob"; description = "Raúl Peñacoba"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" "tent" ]; + hosts = [ "owl1" "owl2" "hut" "tent" "fox" ]; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" -- 2.49.0 From 80cee2dbd0498a9365b6c2356052aac3d42e5924 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 3 Jul 2025 10:26:44 +0200 Subject: [PATCH 378/472] Add pmartin1 user with access to fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 3a1c276..92e7b13 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -140,6 +140,20 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch" ]; }; + + pmartin1 = { + # Arbitrary UID but large so it doesn't collide with other users on ssfhead. + uid = 9652; + isNormalUser = true; + home = "/home/Computational/pmartin1"; + description = "Pedro J. Martinez-Ferrer"; + group = "Computational"; + hosts = [ "fox" ]; + hashedPassword = "$6$nIgDMGnt4YIZl3G.$.JQ2jXLtDPRKsbsJfJAXdSvjDIzRrg7tNNjPkLPq3KJQhMjfDXRUvzagUHUU2TrE2hHM8/6uq8ex0UdxQ0ysl."; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a" + ]; + }; }; groups = { -- 2.49.0 From 25e9c071b088a04e962cb2a2efede456d0aaf2ca Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 9 Jul 2025 11:02:11 +0200 Subject: [PATCH 379/472] Add new configuration for apex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- flake.nix | 1 + keys.nix | 4 +- m/apex/configuration.nix | 58 +++++++++++++++++ m/apex/nfs.nix | 37 +++++++++++ m/common/base/env.nix | 2 +- m/common/base/net.nix | 2 +- m/common/ssf.nix | 1 + m/common/ssf/hosts.nix | 23 +++++++ m/common/ssf/net.nix | 59 ------------------ m/map.nix | 2 +- secrets/ceph-user.age | Bin 1023 -> 1133 bytes secrets/gitea-runner-token.age | 20 +++--- secrets/gitlab-bsc-docker-token.age | Bin 629 -> 629 bytes secrets/gitlab-runner-docker-token.age | Bin 626 -> 626 bytes secrets/gitlab-runner-shell-token.age | Bin 626 -> 626 bytes secrets/ipmi.yml.age | Bin 1563 -> 1563 bytes secrets/jungle-robot-password.age | 24 +++---- secrets/munge-key.age | Bin 2006 -> 2116 bytes secrets/nix-serve.age | 25 ++++---- .../tent-gitlab-runner-bsc-docker-token.age | Bin 628 -> 628 bytes .../tent-gitlab-runner-pm-docker-token.age | Bin 623 -> 623 bytes secrets/tent-gitlab-runner-pm-shell-token.age | 23 ++++--- secrets/vpn-dac-client-key.age | Bin 2246 -> 2246 bytes secrets/vpn-dac-login.age | Bin 568 -> 568 bytes 24 files changed, 172 insertions(+), 109 deletions(-) create mode 100644 m/apex/configuration.nix create mode 100644 m/apex/nfs.nix create mode 100644 m/common/ssf/hosts.nix diff --git a/flake.nix b/flake.nix index 67ae087..b8352a9 100644 --- a/flake.nix +++ b/flake.nix @@ -27,6 +27,7 @@ in lake2 = mkConf "lake2"; raccoon = mkConf "raccoon"; fox = mkConf "fox"; + apex = mkConf "apex"; }; packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { diff --git a/keys.nix b/keys.nix index ad8e304..6971267 100644 --- a/keys.nix +++ b/keys.nix @@ -11,6 +11,7 @@ rec { lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent"; + apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex"; }; hostGroup = with hosts; rec { @@ -19,8 +20,9 @@ rec { playground = [ eudy koro ]; storage = [ bay lake2 ]; monitor = [ hut ]; + login = [ apex ]; - system = storage ++ monitor; + system = storage ++ monitor ++ login; safe = system ++ compute; all = safe ++ playground; }; diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix new file mode 100644 index 0000000..2facf6c --- /dev/null +++ b/m/apex/configuration.nix @@ -0,0 +1,58 @@ +{ lib, config, pkgs, ... }: + +{ + imports = [ + ../common/xeon.nix + ../common/ssf/hosts.nix + ../module/ceph.nix + ./nfs.nix + ]; + + # Don't install grub MBR for now + boot.loader.grub.device = "nodev"; + + boot.initrd.kernelModules = [ + "megaraid_sas" # For HW RAID + ]; + + fileSystems."/home" = { + device = "/dev/disk/by-label/home"; + fsType = "ext4"; + }; + + # No swap, there is plenty of RAM + swapDevices = lib.mkForce []; + + networking = { + hostName = "apex"; + defaultGateway = "84.88.53.233"; + nameservers = [ "8.8.8.8" ]; + + # Public facing interface + interfaces.eno1.ipv4.addresses = [ { + address = "84.88.53.236"; + prefixLength = 29; + } ]; + + # Internal LAN to our Ethernet switch + interfaces.eno2.ipv4.addresses = [ { + address = "10.0.40.30"; + prefixLength = 24; + } ]; + + # Infiniband over Omnipath switch (disconnected for now) + # interfaces.ibp5s0 = {}; + + nat = { + enable = true; + internalInterfaces = [ "eno2" ]; + externalInterface = "eno1"; + }; + }; + + # Use tent for cache + nix.settings = { + extra-substituters = [ "https://jungle.bsc.es/cache" ]; + extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + }; +} diff --git a/m/apex/nfs.nix b/m/apex/nfs.nix new file mode 100644 index 0000000..e245549 --- /dev/null +++ b/m/apex/nfs.nix @@ -0,0 +1,37 @@ +{ ... }: + +{ + services.nfs.server = { + enable = true; + lockdPort = 4001; + mountdPort = 4002; + statdPort = 4000; + exports = '' + /home 10.0.40.0/24(rw,sync,no_subtree_check,root_squash) + ''; + }; + networking.firewall = { + # Check with `rpcinfo -p` + extraCommands = '' + # Accept NFS traffic from compute nodes but not from the outside + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept + # Same but UDP + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept + ''; + # Flush all rules and chains on stop so it won't break on start + extraStopCommands = '' + iptables -F + iptables -X + ''; + }; +} diff --git a/m/common/base/env.nix b/m/common/base/env.nix index d8e417b..e974a6c 100644 --- a/m/common/base/env.nix +++ b/m/common/base/env.nix @@ -4,7 +4,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree - ncdu config.boot.kernelPackages.perf ldns + ncdu config.boot.kernelPackages.perf ldns pv # From bsckgs overlay osumb ]; diff --git a/m/common/base/net.nix b/m/common/base/net.nix index e49d204..9fb1599 100644 --- a/m/common/base/net.nix +++ b/m/common/base/net.nix @@ -11,7 +11,7 @@ }; hosts = { - "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ]; + "84.88.53.236" = [ "apex" "ssfhead.bsc.es" "ssfhead" ]; "84.88.51.152" = [ "raccoon" ]; "84.88.51.142" = [ "raccoon-ipmi" ]; }; diff --git a/m/common/ssf.nix b/m/common/ssf.nix index a01839b..4638c54 100644 --- a/m/common/ssf.nix +++ b/m/common/ssf.nix @@ -4,6 +4,7 @@ ./xeon.nix ./ssf/fs.nix ./ssf/net.nix + ./ssf/hosts.nix ./ssf/ssh.nix ]; } diff --git a/m/common/ssf/hosts.nix b/m/common/ssf/hosts.nix new file mode 100644 index 0000000..039b039 --- /dev/null +++ b/m/common/ssf/hosts.nix @@ -0,0 +1,23 @@ +{ pkgs, ... }: + +{ + networking.hosts = { + # Login + "10.0.40.30" = [ "apex" ]; + + # Storage + "10.0.40.40" = [ "bay" ]; "10.0.42.40" = [ "bay-ib" ]; "10.0.40.141" = [ "bay-ipmi" ]; + "10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ]; + "10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ]; "10.0.40.143" = [ "lake2-ipmi" ]; + + # Xeon compute + "10.0.40.1" = [ "owl1" ]; "10.0.42.1" = [ "owl1-ib" ]; "10.0.40.101" = [ "owl1-ipmi" ]; + "10.0.40.2" = [ "owl2" ]; "10.0.42.2" = [ "owl2-ib" ]; "10.0.40.102" = [ "owl2-ipmi" ]; + "10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ]; + #"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ]; + "10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ]; + "10.0.40.6" = [ "xeon06" ]; "10.0.42.6" = [ "xeon06-ib" ]; "10.0.40.106" = [ "xeon06-ipmi" ]; + "10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ]; + "10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ]; + }; +} diff --git a/m/common/ssf/net.nix b/m/common/ssf/net.nix index dfd85f8..e09ba75 100644 --- a/m/common/ssf/net.nix +++ b/m/common/ssf/net.nix @@ -27,64 +27,5 @@ iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept ''; }; - - extraHosts = '' - 10.0.40.30 ssfhead - - # Node Entry for node: mds01 (ID=72) - 10.0.40.40 bay mds01 mds01-eth0 - 10.0.42.40 bay-ib mds01-ib0 - 10.0.40.141 bay-ipmi mds01-ipmi0 mds01-ipmi - - # Node Entry for node: oss01 (ID=73) - 10.0.40.41 oss01 oss01-eth0 - 10.0.42.41 oss01-ib0 - 10.0.40.142 oss01-ipmi0 oss01-ipmi - - # Node Entry for node: oss02 (ID=74) - 10.0.40.42 lake2 oss02 oss02-eth0 - 10.0.42.42 lake2-ib oss02-ib0 - 10.0.40.143 lake2-ipmi oss02-ipmi0 oss02-ipmi - - # Node Entry for node: xeon01 (ID=15) - 10.0.40.1 owl1 xeon01 xeon01-eth0 - 10.0.42.1 owl1-ib xeon01-ib0 - 10.0.40.101 owl1-ipmi xeon01-ipmi0 xeon01-ipmi - - # Node Entry for node: xeon02 (ID=16) - 10.0.40.2 owl2 xeon02 xeon02-eth0 - 10.0.42.2 owl2-ib xeon02-ib0 - 10.0.40.102 owl2-ipmi xeon02-ipmi0 xeon02-ipmi - - # Node Entry for node: xeon03 (ID=17) - 10.0.40.3 xeon03 xeon03-eth0 - 10.0.42.3 xeon03-ib0 - 10.0.40.103 xeon03-ipmi0 xeon03-ipmi - - # Node Entry for node: xeon04 (ID=18) - 10.0.40.4 xeon04 xeon04-eth0 - 10.0.42.4 xeon04-ib0 - 10.0.40.104 xeon04-ipmi0 xeon04-ipmi - - # Node Entry for node: xeon05 (ID=19) - 10.0.40.5 koro xeon05 xeon05-eth0 - 10.0.42.5 koro-ib xeon05-ib0 - 10.0.40.105 koro-ipmi xeon05-ipmi0 - - # Node Entry for node: xeon06 (ID=20) - 10.0.40.6 xeon06 xeon06-eth0 - 10.0.42.6 xeon06-ib0 - 10.0.40.106 xeon06-ipmi0 xeon06-ipmi - - # Node Entry for node: xeon07 (ID=21) - 10.0.40.7 hut xeon07 xeon07-eth0 - 10.0.42.7 hut-ib xeon07-ib0 - 10.0.40.107 hut-ipmi xeon07-ipmi0 xeon07-ipmi - - # Node Entry for node: xeon08 (ID=22) - 10.0.40.8 eudy xeon08 xeon08-eth0 - 10.0.42.8 eudy-ib xeon08-ib0 - 10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi - ''; }; } diff --git a/m/map.nix b/m/map.nix index 606d417..fc6125c 100644 --- a/m/map.nix +++ b/m/map.nix @@ -6,7 +6,7 @@ switch-opa = { pos=41; size=1; }; # SSF login - ssfhead = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="operations@bsc.es"; }; + apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; }; # Storage bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; }; diff --git a/secrets/ceph-user.age b/secrets/ceph-user.age index 1ca264b08b349dc20b50b7b679d1818d1eebaf59..951722d4439da7d74124c3d7b57c5e9d18960aab 100644 GIT binary patch literal 1133 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4jI^loE?00VOEU2B zH?|DUu=EP73MjD%DF|~aj?DMg4+;v242f{5@T&+)FDo;2EatNCF)__`PI9VpbPm%u z%r!NPN-s-G%T6}(_6QHFEHFt*%rHzeFfS?eibS`qINdQZ*HNJ;B*jzPARxaqD9t>_ z#MP)IDa@~^%t+fjD9OmuB-z9xCBPyoGSbbs(u6C&*wN1<$1JSS-P9%7FfY?QxS+(t zyV4~*(J{xv)Wjpy+$6cYw9MNn*#O-(k4U4+pmYU;ESEg@!a~`>} z9KYmjr@X)nmqb_ZH2-W@3;(p7pg=Ac@5EfwihRrTDEIUL1MgB#e;<>=ta76i%lsn! zRI?(V9Mi%I_uz^wvru%~(#nk@GSU@NBfN@BqQVT5{E|W|%_9nf(n^g(gOV$?E7Er#!T@ASMj5B?6Bf_#>++ETmihYfY1C1TsQ}n&E(<~zO^D`nn4SanI zO)Jv$EiKS(D@iWR4G2`Ihzd5%*RJ&ScJ*=$EHN_5c1!hgH%{003e9zL%+1L&sLF|Q z^vTRga}DKkbYfF)oeBH8=9}O%BTUtTL@Mb1p5(F!e>Z z&CAQsCp=vt!`V6CEGRI?)TunG+#@Q>DK96rFwChm%RSJ;ETG)PysFAP-P|KLGdrKF zNIx(v*(k}e(mTnd*xS#gFgYS5FE7K(z&oVatvIkat-#IL-8iS%y*wS=HfP6hcauPc zaBa8n;&iXz!s29ONB3-FSHIFEw-P4{vj{`ClrR^k{HUrjpVUahV#jo@T#pLF;9ye^ z?*jciN5^6>v%*s2(9rS__hfyO#C+d;=g=t6Bu5v=%nDC*+bSH*jV&D&j9dcrwKK{B z%u~#RT)dOYvt2#P)5F4pJd2XO%nc*;vyEK*OSAIbq9RPVOp9H!42lz7eGR>WJbXff zj66(=O0^3EjItw~ECLGr@=7vY%*!L4vMVvtq;IgBaZ0*EnSM#3yK6*ZnYO9BS%pPp zL13tJKvk%pV?ddEsZ+YSQF)?epoeK$sF4Ymc37&BVRmY%t82bzSw@&?iBnaoe^9Pj zh`w`PvO!LnN1%UXYKF0Ps&6`%uCA_vPf>D}XJlDmK~cC@prK2taYcHdvzvE#wq-Gtl0>d_sNTbT2bOk?`0@Je4)CgnKB9}083m3P*Ov|+5 zWOKuUK)*0oi^xF3;1cHo?Z~W!>VsyWiB$ws}1S+H$RE32(S(KQETY85k7WfzE6&IBiIA)pqr2AS{ z1sha4mstb{=D8W9TW}>h1|)_Dg}6Go2BzmaX9u~PS^AZRN9pIdmL-~(d1(7sWTpiM zyBb6pdZOFr7VKBQxzE$T;*lz=#iR~RAy*D2N;fQXV zvtzisNuWZRdq%mJi;tJ5W2l8wL716)fQ3t5l~F-ih@)?5TBJ#tNnTk|k&(Y;WjR-d zUsRNvOJt~TK%k$msdiyB(H~$>~*PUYUi( zew9%nVcCX8Q3h_Vd4*0UKIv(Jh80=ff#ne?CHaOPSs3x=8|-GBlCF>+8fod7!)Z?rmz8 zuWwkWpKMfGR21kG;8>V#5>Vn8;>g8Z_j5zGuKxar@|#a(PlsL8kT`KGYH~`dR znAHn^e0^y=XG_qg#F$FA@Q1vd2c}B~Zr7Gz-^e&WDC=qKbP?~({E2=tvXZF)O5#jk diff --git a/secrets/gitea-runner-token.age b/secrets/gitea-runner-token.age index b3a1294..a074de3 100644 --- a/secrets/gitea-runner-token.age +++ b/secrets/gitea-runner-token.age @@ -1,11 +1,11 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg WUMWvyagPalsy7u1RaEFAwJvFowso1/quNBo+nAkxhQ -OHcebB7koPKhy58A6qngEVNWckkWChyEK3dwgy8EL5o --> ssh-ed25519 CAWG4Q Yx/HLIryUNE2BaqTl84FrNRy4XLCY2TRkRgbA9k3qU4 -LZljfuLS5yMVVK6N57iC6cKEaFP6Hh2OkvWJjuFg8q0 --> ssh-ed25519 xA739A DOXjPRttSWz51Sr7KfjgKfAtaIYMo3foB1Ywqw9HYDY -CA5puXK/1HDOitA2XHBI3OdKmZ7BzHst4DyuWGMC6hE --> ssh-ed25519 MSF3dg +2LetdIiIZUk7wtHNS1tYsLo4ypwqZ9gpg77RQrnzHU -yIUu8BVbF3dhUx3531RR50/cJQd9gd8VfKUQzEeT/iQ ---- oY/wQ+RjZO2CmKZtbQ0yOVZ5fv2+AlvvkRu1UDfCNAA -_8`G=C7@x &\Ft)cPe%ֽ[zX-0[ɲtz;%~H0؃*XD; \ No newline at end of file +-> ssh-ed25519 HY2yRg d7+nvfAcdC3GjJxipXFrsfGGyP5jAY+gRWRV+4FVYAM +CG7r0bRGgnUWcdfDnpe7HwZ3L/y7b5iuJuqvf15b3/Y +-> ssh-ed25519 CAWG4Q X0vITOErz4wkR3VQYOcVlnrkHtwe+ytdZz1Hcrs4vVs +6IWYOhXLQ+BnML9YfLLHJYEO2CZ/uEc9IBqhoWvjDHI +-> ssh-ed25519 xA739A p5e/0AJtZ0+zbRvkB/usLuxusY8xXRx9Ksi/LQlcIHw +M4S/qlzT9POyJx4gY9lmycstUcdwG2cinN4OlV22zzo +-> ssh-ed25519 MSF3dg Ydl7uBWzBx6sAaxbzC3x8qiaU3ysGqV4rUFLpHCEV30 +/1AUHBhCNOs9i7LJbmzwQDHsu+ybzYf6+coztKk5E3U +--- kYt15WxClpT7PXD1oFe9GqJU+OswjH7y9wIc8/GzZ7M +hߓ`V4F_k)^m$uj:ѳ}Z]$U]u 0v8?XPg%d#d9{rAi \ No newline at end of file diff --git a/secrets/gitlab-bsc-docker-token.age b/secrets/gitlab-bsc-docker-token.age index 4650f21d7a6f53ae51f58f9e79794e70519a631a..985097b038e77c9122ea636bcbdafaa1ac7dad1d 100644 GIT binary patch delta 558 zcmey$@|9(RPQ6ie`m4#)IX2~Vx`e{XxZr*`aK{=L2+PNVSQRTi|nPrYn zZk|b1r6vV|MODU?1xB8kl_3#sZn;VNiQ)d*zKKrdRTfUgiAl+m;~B;4Jv_?uB8*b1 zl1u`^Qp&wUgZ%R?QUimHGAqN9O#_`nJtGqH{F5@w4c$VyQhW^xgB%SlTvGgu1KnJU zOjAo7vr3%(@{B@_QgX8klR~pAvt0{JQhn07baizVDzq~zGE>4+v&!6k!~Lqv3ae6c z+ybh!EsMPjO+(D{^V2HojomH%4Wpu5xhC^{2y@HOwRJlA*m5SL=7;=$t~Tq}7it9D zl3OHlUqk;`Bu`;+=&ofEi&CFF>71a+TY2wty;Y~N-7jC6xevQ9pW54ce6y(K|EpQ~ zZ@-x`@vq)cr~i!q;@&2u%Jlc8!aHU5fACW_Pq^)T?Ch^P#s9sJtZg*@nvnPq|giT?R{J{G0{1^UJ2hC#_5so{}aMV@AP zVV>bBmfD#~WxfGs0lC@UxfNmMzNLvy`q_qt?#Zc+IiUfeZULT?;~B;4EiI~COx*$t zD%?G*inTrcjdLws6AL2!&60~slGDR0^Gw5YqavJ&lbr*(e9B#2JaP^5B1}tz{Vfc0 zqAEiJ&HT+XBJzx!Ov?*Q!jm%m)0{(mL$fTnbaizVf^*#>Epnm)!%D-8i_87YO-qb@ zbJE?7QXI`P4I=!qy-l<0EhEDGN<)kDxmLx$RC(xn`%UW;^Yr74GWy$2^h7P&t&noQ zgyob*`EuJ+Y~RAG9FIR`=wz-kU$ASwk3sw7SO*@(8SgG`+qx=iS=hlXhmzlaxZ`zo zOWVAM1(OVP9e+zDaNPUwXussEiNYS=JsCWfpHS8@6Wr5S;ai~_{6p!&<6_2-0C!5u Aod5s; diff --git a/secrets/gitlab-runner-docker-token.age b/secrets/gitlab-runner-docker-token.age index 3a33b335d80e157c2268ccc9c667ef2d40c8c47f..c481b59b138c092201c280c42e93d28a25bf46de 100644 GIT binary patch delta 554 zcmeyw@`+`FPQ7uymw%G6qf@F+N{DH?pI=0wuc1YeZ?KzFWI%dhYL!7&qDOvNQI)o< z1y^!$Vs2ilqhm^@V~%OLc92PCuA5OxpsE( z#E;_PLB1g=ewD_7W%`9ihDnua22~lZ$;n=c1vzOY9_juS=0;{lWhwq9Mitpy1ttMm z5k*GD0ludBe(sgp1yNq1RmP{jD}9Q&baizVa?@R+97{9Pa^1X>3(UeyqD-p% zQ=L3=GR#BFTmws-%`Kg3i_$_|3(Aesxn35^ojY3jTd}>q=v`oF;H?u@a%VXj69eC| zto11m`{CYuT_Ng~9{>IN>D`CYe*V9f?5KB(-{soou(jGPS8UE6t=scZW}H*7s>6Kc5rLD^;@flt{0W|dkhxno)K&U0EG+5oB#j- delta 554 zcmeyw@`+`FPQ974sdJgRr%#GgQGsu^b4ZAOW?pezgKyYUsRf>kwr;SPLXA4rEx{Lep*$5BbTn7LUD11 zZfc5=si~o*g0o||yGfuzg?^T{v6+cyWN1)&iFsOvr(1wiRbo<7xUXxvfkj?~g>O)i zm#@BIpjl8Tm!pqqXo*pgOObJCL~>w+ZA1@5v4_CW?8=B#YP^f#vXx_;~B;4-P{eF1O2ls z(}HsHQo~IHi^4N2O|y%G3Nn*J%RLGsECYQDqQWy=Jafyre2fh849lW|QVWWM0zynG ze9c1q%>t4=^?ki6%1v^ed_vs}Tm#B1Lo(N?5LjuKN_@wn|fV v=FG|7K5HJ&Y%kbjWw&l$_iHs4>3N#mYXh%}8}@kIX%5>Td~f1nvAhoe+A7AB diff --git a/secrets/gitlab-runner-shell-token.age b/secrets/gitlab-runner-shell-token.age index d35f5340cb0496bce82e3e40ba7d155c6f04c52c..8ecc7e23301a3be68696fcb1dfd8212123fed120 100644 GIT binary patch delta 554 zcmeyw@`+`FPQA9Hlc`UIbD5#Og}G5qMPRN$ez=>rcY3&Au4i$&f1+uXU#YXRe_mpG zF;|IcVwzF0N2p6sp?;)`r)OS%o=ItlsYP<4X;ffXs#jUAS)^rL95v16fSRYXQ|Mo>^$fLpkMdzgt|qJe4P z#E;_PQK=Q_Nfzl&6#=P6CYc#ohRLOd;g$x8e#z-YjuAeNf!;xu0Zxw279pNonZ90L zWkC`7q1xu9;pM&_$*Gnh#V(m?MN#_UIms@;{wW!e&e;_fxuwaI;~B;4vy67baizVJSyBWB2ALha)U}e!%ag|J$y_u zLekSSvYa9;%e=CZ(#n%+GlI;@eL@{0xpq8Y_}I~WxsB~ztE&119u0=Hsf$>TrLmfS zTKC%{d-dOHNj7yPJU`o ziJ5+gzp-I9SDs~Bc^Wg~6d^Wp4RgrA~=O zJ`pKSmPJ0Ep2^x3>8Sy3k?zH=MaeJ=@;~B;4t0Hs!{Iz{@ z^UCuwi_JsbEj@G0A}vfk{QO;f3XMYY(z87BOe#YROtTHR!kwyIjMGcpvb8Oe3?scw zGYy@BA`JC&{8F=%T{1F^N+WYad>t(clF}WybaizVD#I#ri~`CG^v(3molVn%T-{Rw zL(6lEB8q*>B0}7JLjqlEy)3+4!;C!3xxT$xs&Zaxf;RulN0pb}A2=TuYF}x5t|vB0 z=(&-pi~TMEp9e2z^*8g%2s)jVHQA@e5#l!GvB13)o&4El)|I^1w5 diff --git a/secrets/ipmi.yml.age b/secrets/ipmi.yml.age index 02d1218774bb6ebfcbdcf14b93c3f410e9efa2b4..f98c6e47e87972e1cfb66537ae953e105843c3e1 100644 GIT binary patch delta 1480 zcmbQuGn;3EPJM2nV?ji!Pk2(Mt9gK5gn^S!QB<~3P*I7SUx`bYqnBl=YerzIfmeQZ zB$s1ekwIZrWo~eOuy;Ual}nM6wqc&9mseS_v6FLVs!?QAepq5!qGhS2Czr0BLUD11 zZfc5=si~o*g1f0tglV}#c!i^TkW*P&K~ku`k8^wCKhH{zEvfW89~{l-s$>9`ks#AW|h8~`hH%yzHXkA;~B-nGtxctDnlZ| z!%f}X&GU`2s+>by^dl0rLo!Q(wDnyB(@er#L$X~wDk93cv@3kQGu%y_U2@&>-9syV zatcG7A}d|oOS20D{Y!Hz!jg(hBaJcwv&?)apJf!UuguPJ^p8qT4)@RxO?F94G7R=| zE~<3%bPd(_@HH?ku#C)1F-t8BGIe$23QjdQNJ)1p^ezuHO|1;k&M&UW$V>^!DD=sz zG)XS^t8@>nFvu_tc5)Bp($&>f2&@PUC`|SVa5P9UP07hg%kjv|OZ0U2Pjt(6Ep{`E z@bomVkIGJ}^iItx=L&!ISf}h%!rz9{>2E@xpK+`7_`S!;D1}97-@*$Yv>Y4niSGSx zE#!E#GOc48Z}L*n?W^l{=&0V?naV3!=hZR)^qtTyA}uUx;LA{u`0{5NV!x0&gqG*TDd{Y_sw)IUQDUuXMD!u^^fav z$@0DCzLq<;$9>J3uI81ffAsAB@2&6m*N8>hR>=e^_8Fh{Ed?Fi|F)w?Qn31yz#Yd+|LW2#HOq@=U}kSGTw2m zW&Xdf)mldsmQ8GP3s2XGxj&V`&^vG?-#TuFm@cM=>{+)G-)~sQ|5mi(XuV~e)Vm$O z&2)A?W#`%Am74n303&RCR?)GJVzB6_jy|&J*|M4y8 z+Qq_xYiv)#THl9$+A`y$nvJJp)n`xp`VHKt9yq<~JoMvL1LwXYbEN`qY3;oFr`lZH zkL9lJy9M+DQsi zyh3hkoQm?~&X05zn0zT#}KMlT~m@6^m?dLmBF3e5e+>~DW zor7aqef;V2iH`r}oFk`n-aq&E&8iE&J04zUcVPW@zEXlSYH1#KY*~kpcSj}D&(|08 z66fBn5$>54F;}Mg`K`J3+v@!GXU1OH!Ib)NTWqs7ugucV@e-X(k@sc9j$HrtTs`6D zdy}aBQ?(@*na$OGw?g-ErBmcVNqGT9d##=gH*4aUi!17Vh2ws%NhmqlBzX3;gjEf{bOjZnUot5$FXQtythjRzwRgccS zFke18bhX3u9pT*DCM=kgwXFEp>aG23rtP;F<#{x|e5)@yvuyLzJ659nLdrWWxc?|k z=wMiUt3_kJ*7@3i3HFC?PhY2d%`IHp!CPyJhDwmuik25#PX)`5yw3Ucp+D|dzu?_< gn*8Of7PWfFNE$9|YTe`|(wOZfu#tII^if|&0OQ9(h5b9!oJRh36xD3`9CLUD11 zZfc5=si~o*g1f0tglV~gpGRJWi+53}zqx*qn{#${rkg=VN=l@wL7;_cXsB~&cz$G| zcad9_pLwP)mup&Krh#QhxL;J2WvOdHVtS5+g-bz^W1>lcVZKpFhPQ!7T3B{edWuW( z#E;_PLBXl{zD5CN!Jhgl!3X2yn*h2Gjl!A2&9A!crs0l}_Z$-#x5 zuC93zrmp!RzJWPWN!cz*u9?L?+CfP!2KlC$g~@3Z=4qvwety1_;~B-n9g_>p%`CFg zwNvuEt5PCD0-Ph0N^?>R(n=lO@-2K*^euccQY}mM3!Hqpio^X4%uGU~viuW63XM~J z0=*(5Og$YV&5R1u13V29gCh&WBC2u&N<#A|pJf!U4=Hy_w)D%a(aGONgP4J$KFPcF{NDKIPY$_gy-^mOy_%n0@j zNlLVEi!=?-DvGkmb#@Bn($&>fNcS>NGdA%EPV{jrHAr-}F!QJ?a!Dx+$?xBUN9-FJKU z*5fkY_&YB7e7q*vIeYCnc9y;!S`o1>{cGZm_h#%`ACzYoDEuZP-!p1{LD!sB=c4o` z@McGEOHW!j=T4(a%`|7T*rS-miW2x2Jg*DLHq6=X#aca=I0CdjtJe#>}>j# zlkw!P?^pGfuQmLI%=fqP)tQ`gEcVyFJoSse=q%l>T%LhyqEWvS#TK55KgfLk_Zj9N zn%mNqTUn19raXVMpRZ$emR6(B`t+)Zwi=atcYE%AG5cjT|8wDGW?on2`a0JJK}{b0 zh`_I(XUnEt3}1ir^@n@L%AI$n96FQTuyv*$hnn}x%ceemN*eVlYK6SFNpClad)%G2 zg|`Wr+Du8+W6(W9i!3mU+*O9GLMN$cP&53sD1Cx^`dggV{EG)^xe3im{fM< zgQe~-R=1V6Ty|!Ahp8vMZ+UUiW}a1F>`{)}`&V68h~l<8zh_P)`aXArY~^L zjI9@wy?=d{$qeqS#+m$AzS-XM-T2C+Y*KFJv%}XcZEvwDWiwyW+4{$R zoDF}T&T-lJ+QlbLpz~*D4STYmg21^csTNB(0;HBJZtS@u=owcqOSny=nCq0w`8li(tpBs!aAkJ znfq>TWfweFJ@eGv%5|T^7iXV5@Qp*&eTz}q((mq0#x8}S{JnA~bKO!_T5G?(b2COk z$stufLi?#}-EZd^-*z{zi8ZaAT6*Z-t zF)B5`WTk5UxX3=A@f#20Jngvs>c7_))OqCX&&sx1_n*=GOm3^dWB(Wr*`w1Wr^>Of d-DTRXyI%JBd>`*6yL=U_xA;B2`zUydKLAbiffE1# diff --git a/secrets/jungle-robot-password.age b/secrets/jungle-robot-password.age index a25102e..1ebca6b 100644 --- a/secrets/jungle-robot-password.age +++ b/secrets/jungle-robot-password.age @@ -1,13 +1,13 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg 0tpCZ5yI339pgPKGh3HJ8cnkhKlMoyYiKR1mo1cvkm0 -EVVpJ8nyw/W9B65Tw59IjJC5Pb4uQX5LGnzPcf/hUs0 --> ssh-ed25519 G5LX5w YaDAKeAAunommW6q6+hTjrjaadmB17OG89t1Dx/T5z4 -tJXdciiBTz9V+0nf1sGAk4vSlOgfeEgrKr+oDJ/4ays --> ssh-ed25519 CAWG4Q i/cpMcOaZpH7aqwsR/fZiVL9CreL9dkk5F5S9dXrQBY -uU8G51pMH00ywaIVY+AzjpiqzanUYpn9ANRabugSXbE --> ssh-ed25519 xA739A DTiXqnCz1zNgyLt8VvnOkVLDwfa0qJpUBQw9Ms/qHHA -wKjSYYOUEJkPisxT6MNW1eoYk++ECrs1ib9uEYXsAQY --> ssh-ed25519 MSF3dg JmvJsExWPW4b6RT62mz4Wscx7EsyDPVf91A9ps9+shM -67jZYnxJpQAhnRWnTOXs+Cu445dRCpDzIGGp1xYuF3s ---- QmdvzR7QqRPxS1fHc8rR/PDZxN8u+BVKAVvE8cMLhqc -EG Q ssh-ed25519 HY2yRg rsbyYULV9S/kz4OzBLQIVfyotgKrzPzvjPNVw69coTo +i9fgGAYTPxJ4Ulft3xzwNPF8v85Ae9ePMNWp593vSfA +-> ssh-ed25519 G5LX5w mhB3iiqV2e+tT31FCREX2Bqq2F2g+vTYvjCuyGSeJxs +Ep9zZykCGFW841S2mnllEi0oPnRiRuYIGtv6ckp+IBg +-> ssh-ed25519 CAWG4Q M0AJEZuiC6FnRy8rAJQ9T9dCXfIfLXGk0uBGhYOxRSg +5jSRNTi0c6we/oLBdUy1am5saH/5Nh1fmVqYajXFbGc +-> ssh-ed25519 xA739A Zf9tUKg4S4UuWMGEtAWVg0pa6vTzKIl2Ty39IjEG2mE +RCSkVFyO2ZuDlAHung9bTeM91aTXxNRJ779kE0C6pK4 +-> ssh-ed25519 MSF3dg QLiG9s3mgfO6HnQ8/ReizkGllsjYebIL5ZthSVcD7Ao +YdzcodBarrdg6R96Ys01aEPoeYygbT56yz90BMFfr0U +--- fS/rGOP3IGG8b3bCDy26nBL0P1rtqC70CmKOGDsg8Tw +;YM_Zꙺ:]Ez89ze DX9{x^ Ll 㦑9RVhWs \ No newline at end of file diff --git a/secrets/munge-key.age b/secrets/munge-key.age index fe9f9773f091c2f664d751c57f4929e9310864cb..9da37fa5e597fb193f38336cb893e66f1b2c13cc 100644 GIT binary patch delta 2037 zcmcb{e?(w{PJKjqqJgJVdT@DYjzONMt7~P2hiS1}L1d&;np;YiuaQ$xj(dhlKt{Q{ z1(%b4vUjGoOGHpfnp<&}kC}y`r>A>?YpH2svY&gfie?W<)_%zN@}pnu|qtnUS+)iCa#RdA3KHX@IkZQL(#mhO2*w zhmWC)xo4mwS4feCen_ZAdTF+qYe`r{WrabxMTovnWu-@vOGII1a9X&jOO{uXx3R11 z#E;_j7M77^PDRPazM+LVPC1U+Q5gZL#i`*XZk}FcW{#CEo`p$XhDE_Cp?SVsRr+QY zE|n(PenyE-IVR4Drr}w}Mj>8F=A|j_mJudJ2Ek>yh8{+qfl(&tK1nM#ipWS;$PLT# zO?FIj&nQjF(Kqo+c6AHzu_&}Ca8Gj0(RcMs_YDg6Gjk6!F1M&M;0iDea!>UrPAxJn z$q6d-3wQK$kMt@sEhzWSN%hW4EOyk+OtC1d3iLBJnHNwTaZPzvx|{yieYGS zd09bthG)2|znNEQWROL!BUfdlt3_FAnSW?@Vw$$QYe0FfnX5~QipP<(}^2pOWvDT$*c?6qyyO9h93?q-|jwo}N_Z?q^YyWae2M?3-U1;*}JMZk=zi zn{i6ILZM|&MxIwyh<;$PXGKm_NlH$RafFLsSX7XCgt=#ySF%}tfVY`JNv=~Mmq(y} ziib-^y;oYQM`1>RK|!*9ptirUueM`;vZH@xL9m}?k#|~3ia}XLIhU@ku0mm9X1{QO1?fo>`x)1Evuvj3l#d-`+n z*$X0)f%AG_r_}Sm{n|Y(-tAcL#chVa3-_L}j^s`_ebW5n3OzSDjgwQu71^0y&gU?g zX%QB8s_^*JbIP%AZu)a}Z}W+BZfMrqu;t~>es?vAW~TWc5^ZgZr(Xa4R3qba0duec z<6>>54GvOIc~*vs7ZkBea(yV7@Sv;D z@b9AK?^XNPFAAOe?_aG`uE2tqt+P#?u0J#@+Op8>@g|=$Le<|F{V46-fBu!FeV&q8 zZkCzX+V(4kZ?4Nd+MSWknV52QRqwjJpS)GWHOgP59(d`LvQ*!-5 z$Imvmx6N|7#kkt=R7cXAdt47|cuytGw_a3OY2!TY^KNDi>$O`d`=6tvmD?d3A#YfEGQIkSB5)Dr&FxhJ{Sfa~*eMZYNrzgK1>i5q-*l3vbEx2{z-hFTWIVQx!86@8;pB5qfI_0F4p zr)8k1$-G8flCP?+iPp`C|`5K7a43`%*eW0uulJ-MU_A Vw!iM=%ULs?7FdRV_|esR0RWlCSQP*O delta 1945 zcmX>iaE*V0PJLjcNq&eyhIY7imSs^!x@(rViCdvzK~QpJQdF>cNs3QIph2!rR#r}E zI+sa6s#|EePg-PlR&ZvZWtN-2K}C9*U%GE%URGeSTV$kJiFTn`q=}<;AeXM4LUD11 zZfc5=si~o*LUFodVy>ftPnJt)s!O74goOHzopzj3ltSZ--hU~YI>YGy%JS(sOmPgF`#R%voTM!spKUxr`) z#E;_P$q{*BVO1WEK|vOQkrl3$=|$$1rg_B$Q3cM%6_w_NK6%DL6{S)Bl|i9gNj}*{ zk>*u7rjaHFnL*jUrsbv;zRsSx&iWPJ7EwuQ9zmWFc_Dd$!A1s?&oYYFhZq#+ScVi= zdWEL>I+a8iaY}n1|+MdzE^-`4(3Mn0XtgmSm)J<%DY&=adBdR+?rwxtgXK z=IA@QdZ!hY7^USp2bol6x+Pawx+aDOc~?cEhmx0ae-rwQK(t2SA|!oCs&bWN_MenUS&qMM@68cX>gK%pl4c1 zXm&tKp`S}+goR;MrKP#KyGdrHE4p>gj^XYmfeNKTq5hu1ks(ImiJ_4xzPZUEiJ2a5 zrtX$zrXkKIsg}+W^@XNxNf`znM)_QxIgw^zmClX@ropDJseX>R+P>~4j#(agZkGDq zzJ*zViTWw#MXv5%MZW0PRXCa(TRJKj6k3Lr8m5_;<~tV^8&*08SteonMzrujM> zCzm-UmTUW0l?JC)y1H`dhvb_V<$2c|6?+AE7dpFy7`T{*8T#kDc)M7H2Do~-nB_;g z7#O7#m}g@oIp1J6Bup@UkRrZEaH{-*O|foTSJw{XC~~uA6x` zUrg}Z*zW(`M#AaQ6~pYV7YA(k(i!fD_7qew{8%GtQR%sQvu5aY{oJ(q6{3y33Kv9E zE=_KntS3>ww)N~{-iAlntVgmsC&sUeJk0;^<6EJf%emG>id^0KIXpjxb>AM_2~XGO z9zH94!kg*V-MFv`T~|!b%U)Z|y>Uuco#}IyfN8Fp;b&sZ-@N&_ck5Ocvl;s?nj~`H z2u!z9E9xp-eZS~klGdgR>@R1Ei}&gF(TF8;GOzx0v! zO%lBSRq7g#{ezjS=N@4Z+PO@3-~C9vUjlifRk*z0)oWk) zR~C`iYSz*8R`bd2)0}&Q8w|I&h`op@I8Z+0L3^yWL6_hZ=~O;#!DU&Bj+YOY&kMVL zpD(fVtk|+M0*<%tX}GSIsb`&UDxh4m_4e|dp0LBeRxs{3`Kd!L`EK|e#|70DrM?Te zwyv)5v#R>RY{Gxwi0S%*?;m+NLUy0+x@A%S?yjiZKfBQ8fA&9Y-$ie6g3Y zvEk2kvC};ZPQJgh)p!3|nVWBHW7bZ;z5WZ=qqS2fUCDiXcD>>em9Tc1=c1RA5*5?u za2R}c`~0w;Vfq>ddxh|ikM_U($EeiGyM)!hMr-zt*5r=R_w_7gulhr6`i71YwoYHN#dA0K zoDTT4Q1mVTRnwklt5sb@OEosD->+L@AoXa<-#nkkRpIHat0GjV>Q_zBi7cwz@_mtI zR^u%V_Pl+`8_k(Z!wx-6$|*k4#krt!u8>Xq>K$J=Ty&0C8@tG+?cf)5NmEy5x*aVm zw_=9$p?MQmwm*JZ%x9lduuX8c*Q?&(+pgi>e^P7Z&n#wrVlk~mbne!IClS8Q+;6{n zukrfoXOg;3b7#OVF;i(*VFuogxA9RSHG=;P-w7^EcAxFCT-K?2e!s(me{rkS_cuB$ dv_InV=EM2e0vq2sJek@7$F6R4u#0W01puXN9|r&c diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age index 2d142fb..f366897 100644 --- a/secrets/nix-serve.age +++ b/secrets/nix-serve.age @@ -1,13 +1,14 @@ age-encryption.org/v1 --> ssh-ed25519 HY2yRg T/Qom1qxE0M+FuvsXD/KZ6Usfp6v3Xwx043kDgxbCz4 -6GRg0QjuHd2+d6lJfZqqPMPMjS91HEcJ/W0KRV6Et50 --> ssh-ed25519 G5LX5w pzg0wK+Q6KZP67CkyZNYbNcahlq9SIuFN18H85ARykU -aDSrO49tg/a3GOAJR96lh803bXoZqp/G6VMiSvf91vw --> ssh-ed25519 CAWG4Q X+F/6LF8VUUoV72iCLzKKpYGRDoUHuBy1E+yr29RKEo -c779vpt/fiN7n0kGAc5jA9fWkzCPrthlNZdN4p6csrk --> ssh-ed25519 xA739A sbg087VKj/gcycV9JrBNCoCfB4kRMDSVo3EtfpRVDyg -Lv5ges1KmxGwvz4UPZCD0v4YN2ms2Q3wmrJ14XCKYsQ --> ssh-ed25519 MSF3dg pCLeyeWYbnNWQwwlGcsKz0KZ4BaaYKCGjo0XOPpo+no -IsNxFoB2nTxyThJxtAxSA6gauXHGQJnVefs/K2MZ+DM ---- tgB3F+k1/PQt+r5Cz+FqH31hCZFvr0Y8uZVKkdA80yo -60.(s?68QIdgb`Az ssh-ed25519 HY2yRg tdVrzL3EryCEDJSiAjHfr3AC6rhyKLLe9ZaKKa/fyEk +kIbJjp/odUkQ9E2fXpk4zratLieyMNdNLHYGQt8+860 +-> ssh-ed25519 G5LX5w A0wBDwowrQyByfinVVrypH5VyvyKk3O3O8+2JnVgcCI +kLiXfQkC+8QycLyyM/6dAKEE6SGxSZJS7PuOTQr10XE +-> ssh-ed25519 CAWG4Q HkbFgDtrbuv+KCwULZppiy88ZHl3kHcdlTVTOfMKTzM +KMGdQl8Gl51gUp1bxEa41a0VBBiHWD81/9C75NX/pzA +-> ssh-ed25519 xA739A XfYFE5jPFvcoTMXtwJgs3+HPLQxRmvz1W7yqE7jSYGE +497iDMqiIx1u+cBu8KZDNF2SPpGCrVqjGdUPD8kEjE4 +-> ssh-ed25519 MSF3dg Vbxxsmfoywpi4W9WUMzgay3Nd1UBigliYHD7Wew9AHM +aLt5GN8jJWbbrHfs1321tQz44lBaATe0BipT/EGc80I +--- JHESkz0eGNPo3ZEGALVH4xsQ4p1O/6ShlfOw58fjH1k + +AwNgCԢְ7 ǟ4#0ss-*$Z[*ia{?=v-E70]q0)q"K{BZs*l9-E+8<(a*$dNxd \ No newline at end of file diff --git a/secrets/tent-gitlab-runner-bsc-docker-token.age b/secrets/tent-gitlab-runner-bsc-docker-token.age index 8c69121a61468287707e6ec89d75db7b18a628c2..c105a3a34a03b7aab0c76055b28bae063478e772 100644 GIT binary patch delta 556 zcmeyu@`YuBPQ6D?ph=iTj#G+pX0Bc zVMtnTcy^T|mvfMbW1v}DVWL-9q+xDKsAq&>dR1anc~G%oaj1oxMTSRJNnW;}MVX($ z#E;_P6_tsmj;1AM#{T6M+F?cR>7i+1`Z=Bf7D@UQVTQ@6MZN{4QBFRIE@8f0<=O>~ zrWVGPp?PkFMvi6fPVQCVNr9g3fv%C3;f3J_&cQx`mSx((?gq(|;~B;46C<*HDx51L zQ~b-r@(VmXatk6{Jd~O+5V#yj@c=f{ly0!o4H1LyD`S{LIX>1EW%c zUA#COWH delta 556 zcmeyu@`YuBPJMc1hI>RvxudavnYMRwL1si!hO3XOUvi5mo0Tn(T zC7zzCmZd>qiFv_Mj!_k5M)@g;kvQ7#sf;~B;4{mndb3@Uxi z1FPKq+%25ToeYwq3M&fJ(yM}v5-ao4vhoak%{+|5Qk;^x0!=Lgjf|2)3tgRDJ1FDkE^F0mf%|o>f(t}(qxODGnT#!oZ-s|9eFQ+E<%#Rv-h7Z2_avJKl z4UXGR)t{~su`WY9r7%`xYhl)>#n)NGmex0O2_!z7VDxF36gN{LXJzW-=FgkudeN>!$>kDI%GrL%j0Pk3;4W~HmPWlmwG zb6Bd0rD0w=S7NePzJ6YYccF1Xa6rDBYe1r#kDp;$m4`=?YgKWHU%pRCenz;Xhj*s$ z#E%kTsqS9+KB>W;f#p7-Mv>{-?wLkOnb{s*#vaAVK53QthPlOsKE__<9_3smA?1k` zK~CNwIr_o*m67^Bc@;tWS^B2oWzK#PreUd3KHixIh8B^&MUj)^8O7_vD|5|5E7J@k z$~+7`%yRu)T!XUxE%FPpeKY+Gs;VM#jRLYF(n?(Y!;85>%PpeH%e_6FOhXIP!%Fjm zeO!$K6SMqWyrQx_%=IIU{faBK!`*VsBZ|3nb#)aiO)}iWk|Hym-J?=`UAyuTW|n_pl}llTK}n*AetKp~UaD`RwsTmZrAc6#vqxf7P*P#I zE0=SHVPQ_HX;@-*T4hS9SH7ilq_?k6dQqZDMPZa@VMtK9k%?P)?b7NxF%CUP^X;SVdGwnNMLwaG_86 z#E%kT?v_Srj)`75VU8Xd76IO&k>;l1d7kjyc-d29=(nW}%bg8O7^e0}cJleacgk z@?E_$%gb}Ui$a`?EJAYAiW42Hii4d>ebV#uT?(t5f?T=MLbI~+Jaf_t^j(btUA3Ju zj4LdS%JUo(v&+nljYCttOPtCA9Ss8wqLR6Eb#)cW4Ff{`f?Z9E4SjT-SffXV&v@ zZa%Nyb7aCr-_xnmdZB+$w{L2jm>fQ{Md`q$iI0E2UEBKdsuAbMZLDt&rm8EZub#TQ rjE_fPUx`X?uOI))3#~aV%iRhR&6PS8?%7AaRlOs3dHy5m2KD&>*Qv&O diff --git a/secrets/tent-gitlab-runner-pm-shell-token.age b/secrets/tent-gitlab-runner-pm-shell-token.age index 1940789..2d957a7 100644 --- a/secrets/tent-gitlab-runner-pm-shell-token.age +++ b/secrets/tent-gitlab-runner-pm-shell-token.age @@ -1,13 +1,12 @@ age-encryption.org/v1 --> ssh-ed25519 G5LX5w V9bHLoGuY4stRwbzVS9Qa0L9yoY+UoCoXc+dJJQW/Ag -2ut9GfdJ3KBCqZRaloZCQsl8MLfaZAZxqj6JtPJzu2k --> ssh-ed25519 CAWG4Q OAqnIfMECpKglZ7aF9tv/PQinG1Ou2+IEZ+nf4dtQjg -dANdMLe4iI0d6Xd/dIMpZK+mgw2+VmJFQScHaIxD7WI --> ssh-ed25519 xA739A nVNF4Y6VSa5PP6FFBJpVmoFYYseoFx5F2wJU+Pwk+Xk -A5CiuTSNlX9Y76qhYgblBdJl3zPhtjWho2oL5/sIKu0 --> ssh-ed25519 MSF3dg /WMsGnBGzquIMyw06gHKpSS4OUxheulT59kxi+/pxxU -ppwcv7RLzUbQUM7j0Tb9rRVT9XyPMhqYr2fr4S0nTJY ---- zOe0Ko0oxArbmxePMPDVAT0pDju7IeOAih7sNrDcoVs -ikA -hODVw! E݈+`C5LAtM^ E<HI_nno?j- -AnԔί>ZzdTb"(@{_ځC \ No newline at end of file +-> ssh-ed25519 G5LX5w 5K0mzfJGvAB2LGmoQ9ZLbWooVEX6F4+fQdo1JUoB3FM +AKGa507bUrYjXFaMQ1MXTDBFYsdS6zbs+flmxYN0UNo +-> ssh-ed25519 CAWG4Q 8KzLc949on8iN1pK8q11OpCIeO71t6b0zxCLHhcQ6ns +uy7z6RdIuoUes+Uap3k5eoFFuu/DcSrEBwq4V4C/ygc +-> ssh-ed25519 xA739A SLx5cKo0fdAHj+cLpJ4FYTWTUTyDsCqKQOufDu3xnGo +VnS/WsiSaf6RpXuhgfij4pYu4p9hlJl1oXrfYY9rKlQ +-> ssh-ed25519 MSF3dg c5ZXvdNxNfZU3HeWsttuhy+UC5JxWN/IFuCuCGbksn4 +vcKlIirf+VvERX71YpmwW6zp6ClhlG2PR4R8LIN7cQo +--- pJKICDaYAlxqNnvHIuzB3Yk7tv0ZNYflGTQD+Zk/8+4 +h/\JJ +0? p@܉73za',kaIXXOZI\ BP/cUɿ~BS' Qfer^8lVE \ No newline at end of file diff --git a/secrets/vpn-dac-client-key.age b/secrets/vpn-dac-client-key.age index 3e92d23553e801a07a342dd94960dc46a4799298..4ed52512f11a537b76b59a154feac1887cbd7c04 100644 GIT binary patch delta 2187 zcmX>mcua7DPJL01QDJh1WpYMom_cBrc3zsXo2P5Op<}VPk7-13Re^p+q-mzUhoMJ= zBUeOmP*8AUc5abNdZvZBc3yZ`zI$Y1NuIfhL3nOOSYlN{Qb=*Rqic|RD3`9CLUD11 zZfc5=si~o*g0o||yGfuzc2%&Wain`hwnb88s&kleqMM0hR#;H5c4%peiMMg4n{SSv zX-0Obe`ug5S5i<(Vp2*-U}<5lX>eMZZ-`@2agnLFcdCDSaY3iX$?7 zvK;d(ax6+SlFR*aEK&+9-P|e+!@QF#yi9!jN>j_soDD3va!X6nJga=elCrZ4qe=|J zJuSimJk$I`!jhv5or--6(?ZNWe4{+fOhc2obaizVf(tAwJ^T&Q_0z)Ae0-CtjMBnO z14=7%6a77NN{fm!3nMG*^L#_{-Lfq`xfJVfoa$H9@2Gs_CvwSUhM#!O`u%L%LoY@ab8fpG@YJ`j_`6mz zSNc!Q>7$O;r#FA%NU4oDdSxfKN7SwQ%X1IxG2=bpTVZ^0{XNn9^LyUzbXmH^%<^Z`p zerMa-FgdfU{}*q(bkVYo^>eKBifXK|l+m-pb$d9+ z`85X$kFtF}9GtDMrOr{KQz{i+{M+rUQ4ULD;)+l&pO&iqE>;zj3WM4#M5D}>O})#| z)|eDx>B;$dN%3ksWwSNm#yLr%zgWdm7V1R&%diMnx_^}SV_n%#Tm8-a-#zTFx4$*6 zHJg-~l0!T0#KMxu zS2LK_E;MNRygHqGO4vJbEB_J)6HQK8$AhcaX#H4z{dd}u^DS~xy!G2{Yp<%f#rjOV zbolFn&nX8|vz9G5EM_W|R}els(a_$*ZY`gP>Y>THeFpjyTuLiX&YfhVK37^+#^y0^ zQ-s>Kn|`;fELX{F3${`9tSeprZ~l(Gyu4z`d!%aF#hcdl_)8i-5Rq)R-*r>LPpI^) z*mBd`>u!ow2g_cNjA3pLs=rqM&8PgVzqFf)LoG}C{w2YmQ$ur4@PAo3A?8uky#~Jn zA0NA(NYF5ST)(<_$F|9tHXlEIUwN`CL;h*OhwZob)Spwh(C@WrU-}lViNAfBr*C}p zg_p1T*rCYt9({U=y|%LtE0_Ii%*o^ql-2$el3*CU!gxmYImaG@jeq{Iy#2PD_fq}V z)$_NA@-O-PBy7HC>Ersvu(ko zkN4$5?b`K965SOOf-M%VnV>hztBSwI{#R;VeKA+v`ENSG*E>t8-pn(f$^Sp^kCPR$uN+_5#rbU&D_bwXz&ZZ9-f z7u%(@^c$N`#MZmBcP!lAQDqZXm$*WZJ-{|Vbnl}s&wz`uGebA%6zn>0{Gs6yD@S&+YWua7BB>5fml$1j_p$hu!1}PpPjwD;}*lb?=h1kia5= z85t99KH%E*k7@1!w+mr`Q~muu%dS?Qs8wY%;@deqm10%rE;QZ0>0{S-S(VH; zv4IIX7oL9L-oGS!p2NBJLodod|CzDmG2f{GpA$=txp~A)*=CuNUNuG4>6YuNJGLHy z7v+6-@^qM+@e0p9GIi&SOK&$fG+bC>x~;u+R`!huu7W@OtjG8x-*#;Xm?{$|Q(q=2 zmcua7DPQ8hDig`*@xTSWvKqiK#_sUZtODU{!i%QLwjhu&cg#D3`9CLUD11 zZfc5=si~o*g0o||yGfveiK%NvMTB!`h<=!5Zn#s1sautQl~1ZcPI#87vwNgjV6a7Q zMY@TRab!q2mr00cnweu!N@Q?KWx94&USwHFd2ysisZl_pmxWdqEcM*&5I%{jU5YJoq}Ba)AYT=y@O1+g7d2aw3Etmi&BH!3@S@9 z{ZrjTv(vM@6VoEHO0omp6Lazn%8SyA3X&qZbaizV^1VF+Dl9CME4`Bn{hUJ6^Md>{ z%Di*k!g9m>svbf>MJ+x&F>wws!NIywtWlCoP>XNy4k{M{vfiTvGO) z`MGz$@t%f*H%*&veb-gxcHYBv^^EA`-}(v%C+_hOjlNeu)4x1b$#So-xYW|~pQ9MF zoAy}8yEExTM7g*mygL2qN5swu@x@QI6Al!FM#a_Z+?&)Rd1b$})z5-g9QDQOAJ5L; z$EX_jfuZiXm4t)*!HlIgA+F9RU2o{_to=1pQ_6R3qW1j9&jj_{7TuBCa{8a#I?mP$ zUWo!H)wb1z=e{X4@~E|P$@(>UW{c*Vy}v9p*DP3@cmFEqH5=<)>#MGNJzf5ZUrkE& znnCXD&r6$+&tsTB)!Hvjw=KEXTqA68y-L56hS5g7^*&6F7vE{W`R4w?bZ=v~#iuoI z`F>d}@Tyqs%KCBr%iX%Qk}Kb}=H-a87YF+Oo;&H$hV;EnYTOP9rM9VIY>r35Di=#E zIy|3;)5}`=`i`9o|Lr{gW_tUTw!-o?>(uY)1sBft&S)vhd-b*`bHPOACyH)o9X-o+ z`o7k`dSLj`W~Iueryol!T=#0uJo;qA&1W1Q%cFwo%bgS zOFIN|FhsPl8J$|yF(>xJ?C@@b{oCgD&-pj+;UDh2=^tLq5S_hpom6?%a&hKs^5Lsu z*1pIn5`51Tx%#HJu33us2WO{4k1h+%z9GQBr7E#FZxx4Ke)~-B8-Eynlvu~O8S0k) z$hYFJ|0VXq=+i!-dP|eJDHpQePZBWQaLYhjZ^`BbT#o;~{r+{p!}sr|us`-Qf~Mr} z>9P47*`I45uDjygk^S-}Yh0UBq=UNmr~k6QQRLLoJ|2ij4pD8uXbI$jUuT?B1ky{ykgfFaIE`9fM-I2VRotiW1Rb_%oZXTZB z(ciGmkMC;Kzdx_8A3k_l_^)CF&-{)^-u;Z4@~@V!Ra0!3!xj1_XfuZ*tNgz!x@x^4 z3MYEjSei=o2+x|HBj%@d=!8m8cZY}NXRFVf_Q%ayTRU_5^k8@1hX*$QzW3)zgQMDJ zqds@#p1oq#chqwVa!#Qorx~`=*^PUpZt`9Yj`|&iZ*e zWC!!JbJe2h<%JuXGy8>E%_J%W~R z>6yj?Se^-^`7qACq|_6J*dgvV)G?nO{%`)p3k5C_q>=icS5QA z+796ubM(uj%!2)#WdrJv_4}^|ycMHm-T$4dHF~YaPyg;VsoNm&^Sb+M+M_ zSdz(6F0vHaNye1{Sp_B;VHSzziJ5**KB37ad49%8?yiws#%Z2r z!IfcIVHp`lz80m)`VnEqNjU{6fxZF0k(ow?Rf!&c5oHwyNoBs1;~B;44V;Sg6T=Pi z{5*q$wJnW3OUzC4%`&{B!V}GdwSC-7liW-l3yQsr(gH2GD$Vl^ydzyaTvPK5jq_4G zE7J9IBQtZ1Q++Ecs>&kWj0(aFGklY@i=0BabaizVJPY&9P2Ekj4U8f!^O6ny{j-x@ zE3`8MbG-B1Bb`&T9V^V8%}RniOpVJOxdiTUJ&t(sS2c}ie}mB1j0o;&55qtg?mwmpND&HshMAyS&o~ZXJ(#xvbUeMrBiB{yLX_Ic5a%Xt7)<8 z#E;_Pj_!%c5rzICX)gL1xtaOd#@P``&c#Xk#zux-iCIBm+Wu8u7KuqAVMf_p`ue`f zhLwg!+9pnwW}bdt`H2-CK`E};mQj(uCSGYNDM3!g+9j24`WAtc;~B;4tGx5ma&i-^ zd~=+fOp`pkEqy9{lZ*V!EQ3Rmyd2$%LsLQvlYJu7D~+?c@=Uxl^0JCbEcAU#EW*4( z+yX-rgWUW*TtZ9A3NrM~vI29>N~_9^1Kk3-baizV^aIQc$_#?b-1Cwgt3m@E!y`=! z!c+3iqmuojLb4*W6DxzA3ro|yeT>sRx#~ Date: Wed, 9 Jul 2025 11:11:22 +0200 Subject: [PATCH 380/472] Add storcli utility to apex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/apex/configuration.nix | 4 ++++ m/common/base/nix.nix | 2 ++ 2 files changed, 6 insertions(+) diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index 2facf6c..a36a965 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -15,6 +15,10 @@ "megaraid_sas" # For HW RAID ]; + environment.systemPackages = with pkgs; [ + storcli # To manage HW RAID + ]; + fileSystems."/home" = { device = "/dev/disk/by-label/home"; fsType = "ext4"; diff --git a/m/common/base/nix.nix b/m/common/base/nix.nix index 5eee5b7..0e41b27 100644 --- a/m/common/base/nix.nix +++ b/m/common/base/nix.nix @@ -6,6 +6,8 @@ (import ../../../pkgs/overlay.nix) ]; + nixpkgs.config.allowUnfree = true; + nix = { nixPath = [ "nixpkgs=${nixpkgs}" -- 2.49.0 From 1e3b85067db16b8d77e649b143b70ca3f5ff0533 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 9 Jul 2025 11:24:22 +0200 Subject: [PATCH 381/472] Remove proxy configuration from environment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All machines have now direct connection with the outside world. Reviewed-by: Aleix Boné --- m/common/ssf/net.nix | 8 -------- 1 file changed, 8 deletions(-) diff --git a/m/common/ssf/net.nix b/m/common/ssf/net.nix index e09ba75..911e180 100644 --- a/m/common/ssf/net.nix +++ b/m/common/ssf/net.nix @@ -9,14 +9,6 @@ defaultGateway = "10.0.40.30"; nameservers = ["8.8.8.8"]; - proxy = { - default = "http://hut:23080/"; - noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40,hut"; - # Don't set all_proxy as go complains and breaks the gitlab runner, see: - # https://github.com/golang/go/issues/16715 - allProxy = null; - }; - firewall = { extraCommands = '' # Prevent ssfhead from contacting our slurmd daemon -- 2.49.0 From 66001f76f7840f6382d35ff6b0784c00306df668 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 9 Jul 2025 11:26:22 +0200 Subject: [PATCH 382/472] Remove proxy from hut HTTP probes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/blackbox.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml index e69acd3..96d72ae 100644 --- a/m/hut/blackbox.yml +++ b/m/hut/blackbox.yml @@ -3,8 +3,6 @@ modules: prober: http timeout: 5s http: - proxy_url: "http://127.0.0.1:23080" - skip_resolve_phase_with_proxy: true follow_redirects: true valid_status_codes: [] # Defaults to 2xx method: GET -- 2.49.0 From 0d291d715c35b13c22d28de4d6a54468cbfc8a8a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 9 Jul 2025 11:59:36 +0200 Subject: [PATCH 383/472] Add users to apex machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They need to be able to login to apex to access any other machine from the SSF rack. Reviewed-by: Aleix Boné --- m/common/base/users.nix | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 92e7b13..3017aaf 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -56,7 +56,7 @@ home = "/home/Computational/rpenacob"; description = "Raúl Peñacoba"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" "tent" "fox" ]; + hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ]; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" @@ -69,7 +69,7 @@ home = "/home/Computational/anavarro"; description = "Antoni Navarro"; group = "Computational"; - hosts = [ "hut" "tent" "raccoon" "fox" ]; + hosts = [ "apex" "hut" "tent" "raccoon" "fox" ]; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" @@ -82,7 +82,7 @@ home = "/home/Computational/abonerib"; description = "Aleix Boné"; group = "Computational"; - hosts = [ "owl1" "owl2" "hut" "tent" "raccoon" "fox" ]; + hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" ]; hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" @@ -95,7 +95,7 @@ home = "/home/Computational/vlopez"; description = "Victor López"; group = "Computational"; - hosts = [ "koro" ]; + hosts = [ "apex" "koro" ]; hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch" @@ -108,7 +108,7 @@ home = "/home/Computational/dbautist"; description = "Dylan Bautista Cases"; group = "Computational"; - hosts = [ "hut" "tent" "raccoon" ]; + hosts = [ "apex" "hut" "tent" "raccoon" ]; hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" @@ -121,7 +121,7 @@ home = "/home/Computational/dalvare1"; description = "David Álvarez"; group = "Computational"; - hosts = [ "hut" "tent" "fox" ]; + hosts = [ "apex" "hut" "tent" "fox" ]; hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" @@ -134,7 +134,7 @@ home = "/home/Computational/varcila"; description = "Vincent Arcila"; group = "Computational"; - hosts = [ "hut" "tent" "fox" ]; + hosts = [ "apex" "hut" "tent" "fox" ]; hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch" -- 2.49.0 From 16ada096009f10b9d557ba36ab83cdd83afa9fdb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 10:22:04 +0200 Subject: [PATCH 384/472] Remove SSH proxy to access BSC clusters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now have direct connection to them. Reviewed-by: Aleix Boné --- m/common/ssf.nix | 1 - m/common/ssf/ssh.nix | 8 -------- 2 files changed, 9 deletions(-) delete mode 100644 m/common/ssf/ssh.nix diff --git a/m/common/ssf.nix b/m/common/ssf.nix index 4638c54..60fbb04 100644 --- a/m/common/ssf.nix +++ b/m/common/ssf.nix @@ -5,6 +5,5 @@ ./ssf/fs.nix ./ssf/net.nix ./ssf/hosts.nix - ./ssf/ssh.nix ]; } diff --git a/m/common/ssf/ssh.nix b/m/common/ssf/ssh.nix deleted file mode 100644 index 86978f9..0000000 --- a/m/common/ssf/ssh.nix +++ /dev/null @@ -1,8 +0,0 @@ -{ - # Connect to intranet git hosts via proxy - programs.ssh.extraConfig = '' - # Connect to BSC machines via hut proxy too - Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es - ProxyCommand nc -X connect -x hut:23080 %h %p - ''; -} -- 2.49.0 From 2280635cd672a0ac5e9bbc4a21ce3e7fa1511938 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 10:35:38 +0200 Subject: [PATCH 385/472] Disable root_squash from NFS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows root to read files in the NFS export, so we can directly run `nixos-rebuild switch` from /home. Reviewed-by: Aleix Boné --- m/apex/nfs.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/apex/nfs.nix b/m/apex/nfs.nix index e245549..84d9555 100644 --- a/m/apex/nfs.nix +++ b/m/apex/nfs.nix @@ -7,7 +7,7 @@ mountdPort = 4002; statdPort = 4000; exports = '' - /home 10.0.40.0/24(rw,sync,no_subtree_check,root_squash) + /home 10.0.40.0/24(rw,sync,no_subtree_check,no_root_squash) ''; }; networking.firewall = { -- 2.49.0 From eae0c7cb59db5ecab9893fccff9681d5ee78c76e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 11:10:07 +0200 Subject: [PATCH 386/472] Make NFS mount async to improve latency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't wait to flush writes, as we don't care about consistency on a crash: > This option allows the NFS server to violate the NFS protocol and > reply to requests before any changes made by that request have been > committed to stable storage (e.g. disc drive). > > Using this option usually improves performance, but at the cost that > an unclean server restart (i.e. a crash) can cause data to be lost or > corrupted. Reviewed-by: Aleix Boné --- m/apex/nfs.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/apex/nfs.nix b/m/apex/nfs.nix index 84d9555..353a936 100644 --- a/m/apex/nfs.nix +++ b/m/apex/nfs.nix @@ -7,7 +7,7 @@ mountdPort = 4002; statdPort = 4000; exports = '' - /home 10.0.40.0/24(rw,sync,no_subtree_check,no_root_squash) + /home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash) ''; }; networking.firewall = { -- 2.49.0 From b9f9cc7d7aa94218fdd3bc54507f688487dcbd51 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 11:33:04 +0200 Subject: [PATCH 387/472] Use IPv4 in blackbox probes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise they simply fail as IPv6 doesn't work. Reviewed-by: Aleix Boné --- m/hut/blackbox.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml index 96d72ae..38d1ac0 100644 --- a/m/hut/blackbox.yml +++ b/m/hut/blackbox.yml @@ -4,6 +4,7 @@ modules: timeout: 5s http: follow_redirects: true + preferred_ip_protocol: "ip4" valid_status_codes: [] # Defaults to 2xx method: GET http_with_proxy: -- 2.49.0 From 508059c99e17640a33c3b7d52ea19431c406a410 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 11:34:08 +0200 Subject: [PATCH 388/472] Remove unused blackbox configuration modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/blackbox.yml | 147 --------------------------------------------- 1 file changed, 147 deletions(-) diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml index 38d1ac0..a4c12d2 100644 --- a/m/hut/blackbox.yml +++ b/m/hut/blackbox.yml @@ -7,155 +7,8 @@ modules: preferred_ip_protocol: "ip4" valid_status_codes: [] # Defaults to 2xx method: GET - http_with_proxy: - prober: http - http: - proxy_url: "http://127.0.0.1:3128" - skip_resolve_phase_with_proxy: true - http_with_proxy_and_headers: - prober: http - http: - proxy_url: "http://127.0.0.1:3128" - proxy_connect_header: - Proxy-Authorization: - - Bearer token - http_post_2xx: - prober: http - timeout: 5s - http: - method: POST - headers: - Content-Type: application/json - body: '{}' - http_post_body_file: - prober: http - timeout: 5s - http: - method: POST - body_file: "/files/body.txt" - http_basic_auth_example: - prober: http - timeout: 5s - http: - method: POST - headers: - Host: "login.example.com" - basic_auth: - username: "username" - password: "mysecret" - http_2xx_oauth_client_credentials: - prober: http - timeout: 5s - http: - valid_http_versions: ["HTTP/1.1", "HTTP/2"] - follow_redirects: true - preferred_ip_protocol: "ip4" - valid_status_codes: - - 200 - - 201 - oauth2: - client_id: "client_id" - client_secret: "client_secret" - token_url: "https://api.example.com/token" - endpoint_params: - grant_type: "client_credentials" - http_custom_ca_example: - prober: http - http: - method: GET - tls_config: - ca_file: "/certs/my_cert.crt" - http_gzip: - prober: http - http: - method: GET - compression: gzip - http_gzip_with_accept_encoding: - prober: http - http: - method: GET - compression: gzip - headers: - Accept-Encoding: gzip - tls_connect: - prober: tcp - timeout: 5s - tcp: - tls: true - tcp_connect_example: - prober: tcp - timeout: 5s - imap_starttls: - prober: tcp - timeout: 5s - tcp: - query_response: - - expect: "OK.*STARTTLS" - - send: ". STARTTLS" - - expect: "OK" - - starttls: true - - send: ". capability" - - expect: "CAPABILITY IMAP4rev1" - smtp_starttls: - prober: tcp - timeout: 5s - tcp: - query_response: - - expect: "^220 ([^ ]+) ESMTP (.+)$" - - send: "EHLO prober\r" - - expect: "^250-STARTTLS" - - send: "STARTTLS\r" - - expect: "^220" - - starttls: true - - send: "EHLO prober\r" - - expect: "^250-AUTH" - - send: "QUIT\r" - irc_banner_example: - prober: tcp - timeout: 5s - tcp: - query_response: - - send: "NICK prober" - - send: "USER prober prober prober :prober" - - expect: "PING :([^ ]+)" - send: "PONG ${1}" - - expect: "^:[^ ]+ 001" icmp: prober: icmp timeout: 5s icmp: preferred_ip_protocol: "ip4" - dns_udp_example: - prober: dns - timeout: 5s - dns: - query_name: "www.prometheus.io" - query_type: "A" - valid_rcodes: - - NOERROR - validate_answer_rrs: - fail_if_matches_regexp: - - ".*127.0.0.1" - fail_if_all_match_regexp: - - ".*127.0.0.1" - fail_if_not_matches_regexp: - - "www.prometheus.io.\t300\tIN\tA\t127.0.0.1" - fail_if_none_matches_regexp: - - "127.0.0.1" - validate_authority_rrs: - fail_if_matches_regexp: - - ".*127.0.0.1" - validate_additional_rrs: - fail_if_matches_regexp: - - ".*127.0.0.1" - dns_soa: - prober: dns - dns: - query_name: "prometheus.io" - query_type: "SOA" - dns_tcp_example: - prober: dns - dns: - transport_protocol: "tcp" # defaults to "udp" - preferred_ip_protocol: "ip4" # defaults to "ip6" - query_name: "www.prometheus.io" -- 2.49.0 From 28db7799ea214d07683228c002d6a3c5698484f2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 12:29:52 +0200 Subject: [PATCH 389/472] Add proxy configuration for internal hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Access internal hosts via apex proxy. From the compute nodes we first open an SSH connection to apex, and then tunnel it through the HTTP proxy with netcat. This way we allow reaching internal GitLab repositories without requiring the user to have credentials in the remote host, while we can use multiple remotes to provide redundancy. Reviewed-by: Aleix Boné --- m/apex/configuration.nix | 11 +++++++++++ m/common/ssf.nix | 3 ++- m/common/ssf/ssh.nix | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 m/common/ssf/ssh.nix diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index a36a965..0f59f8f 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -54,6 +54,17 @@ }; }; + # Use SSH tunnel to reach internal hosts + programs.ssh.extraConfig = '' + Host bscpm04.bsc.es gitlab-internal.bsc.es knights3.bsc.es + ProxyCommand nc -X connect -x localhost:23080 %h %p + Host raccoon + HostName knights3.bsc.es + ProxyCommand nc -X connect -x localhost:23080 %h %p + Host tent + ProxyJump raccoon + ''; + # Use tent for cache nix.settings = { extra-substituters = [ "https://jungle.bsc.es/cache" ]; diff --git a/m/common/ssf.nix b/m/common/ssf.nix index 60fbb04..8e8dc6b 100644 --- a/m/common/ssf.nix +++ b/m/common/ssf.nix @@ -3,7 +3,8 @@ imports = [ ./xeon.nix ./ssf/fs.nix - ./ssf/net.nix ./ssf/hosts.nix + ./ssf/net.nix + ./ssf/ssh.nix ]; } diff --git a/m/common/ssf/ssh.nix b/m/common/ssf/ssh.nix new file mode 100644 index 0000000..b73abd7 --- /dev/null +++ b/m/common/ssf/ssh.nix @@ -0,0 +1,16 @@ +{ + # Use SSH tunnel to apex to reach internal hosts + programs.ssh.extraConfig = '' + Host tent + ProxyJump raccoon + + # Access raccoon via the HTTP proxy + Host raccoon knights3.bsc.es + HostName knights3.bsc.es + ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p' + + # Make sure we can reach gitlab even if we don't have SSH access to raccoon + Host bscpm04.bsc.es gitlab-internal.bsc.es + ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p' + ''; +} -- 2.49.0 From 96661dd0d48fa64e6870b71ad12f8f433412ba48 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 16:12:44 +0200 Subject: [PATCH 390/472] Prevent accidental use of nftables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/net.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/m/common/base/net.nix b/m/common/base/net.nix index 9fb1599..3a64c78 100644 --- a/m/common/base/net.nix +++ b/m/common/base/net.nix @@ -1,4 +1,4 @@ -{ pkgs, ... }: +{ pkgs, lib, ... }: { networking = { @@ -10,6 +10,9 @@ allowedTCPPorts = [ 22 ]; }; + # Make sure we use iptables + nftables.enable = lib.mkForce false; + hosts = { "84.88.53.236" = [ "apex" "ssfhead.bsc.es" "ssfhead" ]; "84.88.51.152" = [ "raccoon" ]; -- 2.49.0 From 570c6e175db5aa3db2ae3c83da7f4572bc1d5f97 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 11 Jul 2025 16:13:35 +0200 Subject: [PATCH 391/472] Remove extra flush commands on firewall stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They are not needed as they are already flushed when the firewall starts or stops. Reviewed-by: Aleix Boné --- m/apex/nfs.nix | 5 ----- 1 file changed, 5 deletions(-) diff --git a/m/apex/nfs.nix b/m/apex/nfs.nix index 353a936..b1668c1 100644 --- a/m/apex/nfs.nix +++ b/m/apex/nfs.nix @@ -28,10 +28,5 @@ iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept ''; - # Flush all rules and chains on stop so it won't break on start - extraStopCommands = '' - iptables -F - iptables -X - ''; }; } -- 2.49.0 From bd94c4ad00ccede2804a4f3283584f2ed79a737a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 15:07:52 +0200 Subject: [PATCH 392/472] Add weasel machine configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- flake.nix | 1 + keys.nix | 23 ++++++++++++----------- m/common/base/users.nix | 4 ++-- m/common/ssf/hosts.nix | 2 +- m/map.nix | 2 +- m/weasel/configuration.nix | 28 ++++++++++++++++++++++++++++ 6 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 m/weasel/configuration.nix diff --git a/flake.nix b/flake.nix index b8352a9..a07a184 100644 --- a/flake.nix +++ b/flake.nix @@ -28,6 +28,7 @@ in raccoon = mkConf "raccoon"; fox = mkConf "fox"; apex = mkConf "apex"; + weasel = mkConf "weasel"; }; packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { diff --git a/keys.nix b/keys.nix index 6971267..6fbb78a 100644 --- a/keys.nix +++ b/keys.nix @@ -2,22 +2,23 @@ # here all the public keys rec { hosts = { - hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut"; - owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1"; - owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2"; - eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy"; - koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; - bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; - lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; - fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; - tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent"; - apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex"; + hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut"; + owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1"; + owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2"; + eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy"; + koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; + bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; + lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; + fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; + tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent"; + apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex"; + weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel"; }; hostGroup = with hosts; rec { untrusted = [ fox ]; compute = [ owl1 owl2 ]; - playground = [ eudy koro ]; + playground = [ eudy koro weasel ]; storage = [ bay lake2 ]; monitor = [ hut ]; login = [ apex ]; diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 3017aaf..c16ce48 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -69,7 +69,7 @@ home = "/home/Computational/anavarro"; description = "Antoni Navarro"; group = "Computational"; - hosts = [ "apex" "hut" "tent" "raccoon" "fox" ]; + hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ]; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" @@ -82,7 +82,7 @@ home = "/home/Computational/abonerib"; description = "Aleix Boné"; group = "Computational"; - hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" ]; + hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ]; hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" diff --git a/m/common/ssf/hosts.nix b/m/common/ssf/hosts.nix index 039b039..cd99eb9 100644 --- a/m/common/ssf/hosts.nix +++ b/m/common/ssf/hosts.nix @@ -16,7 +16,7 @@ "10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ]; #"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ]; "10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ]; - "10.0.40.6" = [ "xeon06" ]; "10.0.42.6" = [ "xeon06-ib" ]; "10.0.40.106" = [ "xeon06-ipmi" ]; + "10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ]; "10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ]; "10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ]; }; diff --git a/m/map.nix b/m/map.nix index fc6125c..7ebd0bb 100644 --- a/m/map.nix +++ b/m/map.nix @@ -19,7 +19,7 @@ xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; }; # Slot 34 empty koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; }; - xeon06 = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; }; + weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; }; hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; }; eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; }; diff --git a/m/weasel/configuration.nix b/m/weasel/configuration.nix new file mode 100644 index 0000000..8a92f9c --- /dev/null +++ b/m/weasel/configuration.nix @@ -0,0 +1,28 @@ +{ lib, ... }: + +{ + imports = [ + ../common/ssf.nix + ]; + + # Select this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca"; + + # No swap, there is plenty of RAM + swapDevices = lib.mkForce []; + + # Users with sudo access + users.groups.wheel.members = [ "abonerib" "anavarro" ]; + + networking = { + hostName = "weasel"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.6"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.6"; + prefixLength = 24; + } ]; + }; +} -- 2.49.0 From b802f88df92dadfade0fae47ac84d68e279ef199 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 17:15:59 +0200 Subject: [PATCH 393/472] Rotate anavarro password and SSH key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index c16ce48..b6b2619 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -70,9 +70,9 @@ description = "Antoni Navarro"; group = "Computational"; hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ]; - hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; + hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1"; openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni" ]; }; -- 2.49.0 From 7379e84e79f955ba5776e9ca147558adfc9767d0 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 17:30:20 +0200 Subject: [PATCH 394/472] Silently ban OpenVAS BSC scanner from apex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is spamming our logs with refused connection lines: apex% sudo journalctl -b0 | grep 'refused connection.*SRC=192.168.8.16' | wc -l 13945 Reviewed-by: Aleix Boné --- m/apex/configuration.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index 0f59f8f..59f21d7 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -65,6 +65,14 @@ ProxyJump raccoon ''; + networking.firewall = { + extraCommands = '' + # Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our + # logs. Insert as first position so we also protect SSH. + iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse + ''; + }; + # Use tent for cache nix.settings = { extra-substituters = [ "https://jungle.bsc.es/cache" ]; -- 2.49.0 From 37c12783bb1bc969c7b2dc8364321bff35795848 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 17:45:40 +0200 Subject: [PATCH 395/472] Upgrade nixpkgs to nixos 25.05 Reviewed-by: Aleix Roca Nonell --- flake.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index a07a184..fce9db1 100644 --- a/flake.nix +++ b/flake.nix @@ -1,6 +1,6 @@ { inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; agenix.url = "github:ryantm/agenix"; agenix.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; -- 2.49.0 From 380abe995768dd36ebb4bf5537c821414c4de879 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 17:46:48 +0200 Subject: [PATCH 396/472] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'agenix': 'github:ryantm/agenix/f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41?narHash=sha256-b%2Buqzj%2BWa6xgMS9aNbX4I%2BsXeb5biPDi39VgvSFqFvU%3D' (2024-08-10) → 'github:ryantm/agenix/531beac616433bac6f9e2a19feb8e99a22a66baf?narHash=sha256-9P1FziAwl5%2B3edkfFcr5HeGtQUtrSdk/MksX39GieoA%3D' (2025-06-17) • Updated input 'agenix/darwin': 'github:lnl7/nix-darwin/4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d?narHash=sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0%3D' (2023-11-24) → 'github:lnl7/nix-darwin/43975d782b418ebf4969e9ccba82466728c2851b?narHash=sha256-dyN%2BteG9G82G%2Bm%2BPX/aSAagkC%2BvUv0SgUw3XkPhQodQ%3D' (2025-04-12) • Updated input 'agenix/home-manager': 'github:nix-community/home-manager/3bfaacf46133c037bb356193bd2f1765d9dc82c1?narHash=sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE%3D' (2023-12-20) → 'github:nix-community/home-manager/abfad3d2958c9e6300a883bd443512c55dfeb1be?narHash=sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs%3D' (2025-04-24) • Updated input 'bscpkgs': 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f' (2024-11-29) → 'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=9d1944c658929b6f98b3f3803fead4d1b91c4405' (2025-06-11) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc?narHash=sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8%3D' (2025-01-14) → 'github:NixOS/nixpkgs/dfcd5b901dbab46c9c6e80b265648481aafb01f8?narHash=sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw%3D' (2025-07-13) Reviewed-by: Aleix Roca Nonell --- flake.lock | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/flake.lock b/flake.lock index 9d72e48..30ed5ac 100644 --- a/flake.lock +++ b/flake.lock @@ -10,11 +10,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1723293904, - "narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=", + "lastModified": 1750173260, + "narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=", "owner": "ryantm", "repo": "agenix", - "rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41", + "rev": "531beac616433bac6f9e2a19feb8e99a22a66baf", "type": "github" }, "original": { @@ -30,11 +30,11 @@ ] }, "locked": { - "lastModified": 1732868163, - "narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=", + "lastModified": 1749650500, + "narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=", "ref": "refs/heads/master", - "rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f", - "revCount": 952, + "rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405", + "revCount": 961, "type": "git", "url": "https://git.sr.ht/~rodarima/bscpkgs" }, @@ -51,11 +51,11 @@ ] }, "locked": { - "lastModified": 1700795494, - "narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=", + "lastModified": 1744478979, + "narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=", "owner": "lnl7", "repo": "nix-darwin", - "rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d", + "rev": "43975d782b418ebf4969e9ccba82466728c2851b", "type": "github" }, "original": { @@ -73,11 +73,11 @@ ] }, "locked": { - "lastModified": 1703113217, - "narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=", + "lastModified": 1745494811, + "narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=", "owner": "nix-community", "repo": "home-manager", - "rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1", + "rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be", "type": "github" }, "original": { @@ -88,16 +88,16 @@ }, "nixpkgs": { "locked": { - "lastModified": 1736867362, - "narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=", + "lastModified": 1752436162, + "narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc", + "rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-24.11", + "ref": "nixos-25.05", "repo": "nixpkgs", "type": "github" } -- 2.49.0 From a1b387e454fa1560f8dc3600d843a3b9295d71dc Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 17:50:12 +0200 Subject: [PATCH 397/472] Remove package ix as it is gone Fails with: "error: ix has been removed from Nixpkgs, as the ix.io pastebin has been offline since Dec. 2023". Reviewed-by: Aleix Roca Nonell --- m/common/base/env.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/env.nix b/m/common/base/env.nix index e974a6c..e553050 100644 --- a/m/common/base/env.nix +++ b/m/common/base/env.nix @@ -3,7 +3,7 @@ { environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option - nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree + nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree ncdu config.boot.kernelPackages.perf ldns pv # From bsckgs overlay osumb -- 2.49.0 From 0e37ab5fe15340ad52d3f900aaeffebfa924176a Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 17:57:22 +0200 Subject: [PATCH 398/472] Remove merged MPICH patch Reviewed-by: Aleix Roca Nonell --- pkgs/mpich-fix-hwtopo.patch | 36 ------------------------------------ pkgs/overlay.nix | 4 ---- 2 files changed, 40 deletions(-) delete mode 100644 pkgs/mpich-fix-hwtopo.patch diff --git a/pkgs/mpich-fix-hwtopo.patch b/pkgs/mpich-fix-hwtopo.patch deleted file mode 100644 index 11146d5..0000000 --- a/pkgs/mpich-fix-hwtopo.patch +++ /dev/null @@ -1,36 +0,0 @@ -diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c -index 33e88bc..ee3641c 100644 ---- a/src/util/mpir_hwtopo.c -+++ b/src/util/mpir_hwtopo.c -@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void) - #ifdef HAVE_HWLOC - bindset = hwloc_bitmap_alloc(); - hwloc_topology_init(&hwloc_topology); -- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile"); -- if (xmlfile != NULL) { -- int rc; -- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile); -- if (rc == 0) { -- /* To have hwloc still actually call OS-specific hooks, the -- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded -- * file is really the underlying system. */ -- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM); -- } -- MPL_free(xmlfile); -- } - - hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL); - if (!hwloc_topology_load(hwloc_topology)) - ---- a/src/mpi/init/local_proc_attrs.c -+++ b/src/mpi/init/local_proc_attrs.c -@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required) - /* Set the number of tag bits. The device may override this value. */ - MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT; - -- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds"); -- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds); -- MPL_free(requested_kinds); -- - return mpi_errno; - } diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 2a1df4a..7db9941 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -11,10 +11,6 @@ final: prev: paths = [ pmix.dev pmix.out ]; }; in prev.mpich.overrideAttrs (old: { - patches = (old.patches or []) ++ [ - # See https://github.com/pmodels/mpich/issues/6946 - ./mpich-fix-hwtopo.patch - ]; buildInput = old.buildInputs ++ [ libfabric pmixAll -- 2.49.0 From fc9fcd602ac60012dcb41414834180ceafba0a1d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 15 Jul 2025 18:48:08 +0200 Subject: [PATCH 399/472] Update weasel IPMI hostname for monitoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/targets.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/hut/targets.yml b/m/hut/targets.yml index c04f864..fc4c72d 100644 --- a/m/hut/targets.yml +++ b/m/hut/targets.yml @@ -4,7 +4,7 @@ - xeon03-ipmi - xeon04-ipmi - koro-ipmi - - xeon06-ipmi + - weasel-ipmi - hut-ipmi - eudy-ipmi # Storage -- 2.49.0 From 2ef32f773c6ffb40420661dc13a83448756745ab Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 16 Jul 2025 16:59:29 +0200 Subject: [PATCH 400/472] Ban another scanner trying to connect via SSH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is constantly spamming out logs: apex# journalctl | grep 'Connection closed by 84.88.52.176' | wc -l 2255 Reviewed-by: Aleix Boné --- m/apex/configuration.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index 59f21d7..540e507 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -70,6 +70,8 @@ # Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our # logs. Insert as first position so we also protect SSH. iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse + # Same with opsmonweb01.bsc.es which seems to be trying to access via SSH + iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse ''; }; -- 2.49.0 From c26ec1b6f11a9114b443a34de18515cf2fe190f1 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 17 Jul 2025 11:26:27 +0200 Subject: [PATCH 401/472] Remove option allowUnfree from fox and raccoon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is already set to true for all machines. Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 1 - m/raccoon/configuration.nix | 1 - 2 files changed, 2 deletions(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 3fb9010..860be21 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -56,7 +56,6 @@ # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; hardware.graphics.enable = true; - nixpkgs.config.allowUnfree = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 5f75f99..4a8a0ec 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -52,7 +52,6 @@ # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; hardware.graphics.enable = true; - nixpkgs.config.allowUnfree = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; -- 2.49.0 From 750504744fc1c28513f8369836d0eeb5a2594cf0 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 17 Jul 2025 11:32:35 +0200 Subject: [PATCH 402/472] Enable open source NVidia driver in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is recommended for newer versions. Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 860be21..f1d967a 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -55,6 +55,7 @@ # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; + hardware.nvidia.open = true; hardware.graphics.enable = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; -- 2.49.0 From 6c1109303399e0c67ad0ddd41556628d3e5079ac Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 18 Jul 2025 13:00:03 +0200 Subject: [PATCH 403/472] Select proprietary NVIDIA driver in raccoon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NVIDIA GTX 960 from 2016 has the Maxwell architecture, and NixOS suggests using the proprietary driver for older than Turing: > It is suggested to use the open source kernel modules on Turing or > later GPUs (RTX series, GTX 16xx), and the closed source modules > otherwise. Reviewed-by: Aleix Boné --- m/raccoon/configuration.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 4a8a0ec..c916c02 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -51,6 +51,7 @@ # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; + hardware.nvidia.open = false; # Maxwell is older than Turing architecture hardware.graphics.enable = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; -- 2.49.0 From 4a5787e0c62e853fba042e5a0ff46d20de490107 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 18 Jul 2025 13:43:58 +0200 Subject: [PATCH 404/472] Enable automatic Nix GC in raccoon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/raccoon/configuration.nix | 3 --- 1 file changed, 3 deletions(-) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index c916c02..cee936a 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -56,9 +56,6 @@ nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; - # Disable garbage collection for now - nix.gc.automatic = lib.mkForce false; - services.openssh.settings.X11Forwarding = true; services.prometheus.exporters.node = { -- 2.49.0 From ce8b05b142ce6af45d8cb085e8ebd5981e18c6fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Fri, 18 Jul 2025 10:59:39 +0200 Subject: [PATCH 405/472] Replace xeon07 by hut in ssh config The xeon07 machine has been renamed to hut. Reviewed-by: Rodrigo Arias Mallo --- m/module/ssh-hut-extern.nix | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/m/module/ssh-hut-extern.nix b/m/module/ssh-hut-extern.nix index 5eeafb3..cc6b26a 100644 --- a/m/module/ssh-hut-extern.nix +++ b/m/module/ssh-hut-extern.nix @@ -1,9 +1,8 @@ { programs.ssh.extraConfig = '' - Host ssfhead + Host apex ssfhead HostName ssflogin.bsc.es Host hut - ProxyJump ssfhead - HostName xeon07 + ProxyJump apex ''; } -- 2.49.0 From f3733418b2122602fa271ffa628beff094995e54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Fri, 18 Jul 2025 11:31:59 +0200 Subject: [PATCH 406/472] Move shared nvidia settings to a separate module Reviewed-by: Rodrigo Arias Mallo --- m/fox/configuration.nix | 7 ++----- m/module/nvidia.nix | 9 +++++++++ m/raccoon/configuration.nix | 6 +----- 3 files changed, 12 insertions(+), 10 deletions(-) create mode 100644 m/module/nvidia.nix diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index f1d967a..fd5cdc8 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -5,6 +5,7 @@ ../common/base.nix ../common/xeon/console.nix ../module/emulation.nix + ../module/nvidia.nix ]; # Select the this using the ID to avoid mismatches @@ -53,12 +54,8 @@ extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; }; - # Configure Nvidia driver to use with CUDA - hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; + # Recommended for new graphics cards hardware.nvidia.open = true; - hardware.graphics.enable = true; - nixpkgs.config.nvidia.acceptLicense = true; - services.xserver.videoDrivers = [ "nvidia" ]; # Mount NVME disks fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; }; diff --git a/m/module/nvidia.nix b/m/module/nvidia.nix new file mode 100644 index 0000000..3db4071 --- /dev/null +++ b/m/module/nvidia.nix @@ -0,0 +1,9 @@ +{ lib, config, ... }: +{ + # Configure Nvidia driver to use with CUDA + hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; + hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set"); + hardware.graphics.enable = true; + nixpkgs.config.nvidia.acceptLicense = true; + services.xserver.videoDrivers = [ "nvidia" ]; +} diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index cee936a..46f63f5 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -6,6 +6,7 @@ ../module/emulation.nix ../module/debuginfod.nix ../module/ssh-hut-extern.nix + ../module/nvidia.nix ../eudy/kernel/perf.nix ]; @@ -49,12 +50,7 @@ # Enable performance governor powerManagement.cpuFreqGovernor = "performance"; - # Configure Nvidia driver to use with CUDA - hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; hardware.nvidia.open = false; # Maxwell is older than Turing architecture - hardware.graphics.enable = true; - nixpkgs.config.nvidia.acceptLicense = true; - services.xserver.videoDrivers = [ "nvidia" ]; services.openssh.settings.X11Forwarding = true; -- 2.49.0 From 8d984a0672f46ec2e77a8ca760f0be3d35da6ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Fri, 18 Jul 2025 11:34:28 +0200 Subject: [PATCH 407/472] Enable cuda systemFeature in raccoon and fox This allows running derivations which depend on cuda runtime without breaking the sandbox. We only need to add `requiredSystemFeatures = [ "cuda" ];` to the derivation. Reviewed-by: Rodrigo Arias Mallo --- m/module/nvidia.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m/module/nvidia.nix b/m/module/nvidia.nix index 3db4071..6de164f 100644 --- a/m/module/nvidia.nix +++ b/m/module/nvidia.nix @@ -6,4 +6,9 @@ hardware.graphics.enable = true; nixpkgs.config.nvidia.acceptLicense = true; services.xserver.videoDrivers = [ "nvidia" ]; + + # enable support for derivations which require nvidia-gpu to be available + # > requiredSystemFeatures = [ "cuda" ]; + programs.nix-required-mounts.enable = true; + programs.nix-required-mounts.presets.nvidia-gpu.enable = true; } -- 2.49.0 From f2d8ee85526a0d460598b8dd2c65c76cfb4dfc8c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 21 Jul 2025 17:19:25 +0200 Subject: [PATCH 408/472] Add missing symlink in cuda sandbox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/nvidia.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/module/nvidia.nix b/m/module/nvidia.nix index 6de164f..a41b112 100644 --- a/m/module/nvidia.nix +++ b/m/module/nvidia.nix @@ -11,4 +11,8 @@ # > requiredSystemFeatures = [ "cuda" ]; programs.nix-required-mounts.enable = true; programs.nix-required-mounts.presets.nvidia-gpu.enable = true; + # They forgot to add the symlink + programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [ + config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument + ]; } -- 2.49.0 From 3269d763aac07cdee4d33b1d44c8ab7066882fc2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 22 Jul 2025 15:24:55 +0200 Subject: [PATCH 409/472] Add cudainfo program to test CUDA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cudainfo program checks that we can initialize the CUDA RT library and communicate with the driver. It can be used as standalone program or built with cudainfo.gpuCheck so it is executed inside the build sandbox to see if it also works fine. It uses the autoAddDriverRunpath hook to inject in the runpath the location of the library directory for CUDA libraries. Reviewed-by: Aleix Boné --- m/module/nvidia.nix | 4 +- pkgs/cudainfo/Makefile | 12 + pkgs/cudainfo/cudainfo.cpp | 600 +++++++++++++++++++++++++++++++++++++ pkgs/cudainfo/default.nix | 43 +++ pkgs/overlay.nix | 1 + 5 files changed, 659 insertions(+), 1 deletion(-) create mode 100644 pkgs/cudainfo/Makefile create mode 100644 pkgs/cudainfo/cudainfo.cpp create mode 100644 pkgs/cudainfo/default.nix diff --git a/m/module/nvidia.nix b/m/module/nvidia.nix index a41b112..baebc42 100644 --- a/m/module/nvidia.nix +++ b/m/module/nvidia.nix @@ -1,4 +1,4 @@ -{ lib, config, ... }: +{ lib, config, pkgs, ... }: { # Configure Nvidia driver to use with CUDA hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; @@ -15,4 +15,6 @@ programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [ config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument ]; + + environment.systemPackages = [ pkgs.cudainfo ]; } diff --git a/pkgs/cudainfo/Makefile b/pkgs/cudainfo/Makefile new file mode 100644 index 0000000..5990eba --- /dev/null +++ b/pkgs/cudainfo/Makefile @@ -0,0 +1,12 @@ +HOSTCXX ?= g++ +NVCC := nvcc -ccbin $(HOSTCXX) +CXXFLAGS := -m64 + +# Target rules +all: cudainfo + +cudainfo: cudainfo.cpp + $(NVCC) $(CXXFLAGS) -o $@ $< + +clean: + rm -f cudainfo cudainfo.o diff --git a/pkgs/cudainfo/cudainfo.cpp b/pkgs/cudainfo/cudainfo.cpp new file mode 100644 index 0000000..815500b --- /dev/null +++ b/pkgs/cudainfo/cudainfo.cpp @@ -0,0 +1,600 @@ +/* + * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ +/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */ + +// Shared Utilities (QA Testing) + +// std::system includes +#include +#include + +#include + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) + +// CUDA Runtime error messages +#ifdef __DRIVER_TYPES_H__ +static const char *_cudaGetErrorEnum(cudaError_t error) +{ + switch (error) + { + case cudaSuccess: + return "cudaSuccess"; + + case cudaErrorMissingConfiguration: + return "cudaErrorMissingConfiguration"; + + case cudaErrorMemoryAllocation: + return "cudaErrorMemoryAllocation"; + + case cudaErrorInitializationError: + return "cudaErrorInitializationError"; + + case cudaErrorLaunchFailure: + return "cudaErrorLaunchFailure"; + + case cudaErrorPriorLaunchFailure: + return "cudaErrorPriorLaunchFailure"; + + case cudaErrorLaunchTimeout: + return "cudaErrorLaunchTimeout"; + + case cudaErrorLaunchOutOfResources: + return "cudaErrorLaunchOutOfResources"; + + case cudaErrorInvalidDeviceFunction: + return "cudaErrorInvalidDeviceFunction"; + + case cudaErrorInvalidConfiguration: + return "cudaErrorInvalidConfiguration"; + + case cudaErrorInvalidDevice: + return "cudaErrorInvalidDevice"; + + case cudaErrorInvalidValue: + return "cudaErrorInvalidValue"; + + case cudaErrorInvalidPitchValue: + return "cudaErrorInvalidPitchValue"; + + case cudaErrorInvalidSymbol: + return "cudaErrorInvalidSymbol"; + + case cudaErrorMapBufferObjectFailed: + return "cudaErrorMapBufferObjectFailed"; + + case cudaErrorUnmapBufferObjectFailed: + return "cudaErrorUnmapBufferObjectFailed"; + + case cudaErrorInvalidHostPointer: + return "cudaErrorInvalidHostPointer"; + + case cudaErrorInvalidDevicePointer: + return "cudaErrorInvalidDevicePointer"; + + case cudaErrorInvalidTexture: + return "cudaErrorInvalidTexture"; + + case cudaErrorInvalidTextureBinding: + return "cudaErrorInvalidTextureBinding"; + + case cudaErrorInvalidChannelDescriptor: + return "cudaErrorInvalidChannelDescriptor"; + + case cudaErrorInvalidMemcpyDirection: + return "cudaErrorInvalidMemcpyDirection"; + + case cudaErrorAddressOfConstant: + return "cudaErrorAddressOfConstant"; + + case cudaErrorTextureFetchFailed: + return "cudaErrorTextureFetchFailed"; + + case cudaErrorTextureNotBound: + return "cudaErrorTextureNotBound"; + + case cudaErrorSynchronizationError: + return "cudaErrorSynchronizationError"; + + case cudaErrorInvalidFilterSetting: + return "cudaErrorInvalidFilterSetting"; + + case cudaErrorInvalidNormSetting: + return "cudaErrorInvalidNormSetting"; + + case cudaErrorMixedDeviceExecution: + return "cudaErrorMixedDeviceExecution"; + + case cudaErrorCudartUnloading: + return "cudaErrorCudartUnloading"; + + case cudaErrorUnknown: + return "cudaErrorUnknown"; + + case cudaErrorNotYetImplemented: + return "cudaErrorNotYetImplemented"; + + case cudaErrorMemoryValueTooLarge: + return "cudaErrorMemoryValueTooLarge"; + + case cudaErrorInvalidResourceHandle: + return "cudaErrorInvalidResourceHandle"; + + case cudaErrorNotReady: + return "cudaErrorNotReady"; + + case cudaErrorInsufficientDriver: + return "cudaErrorInsufficientDriver"; + + case cudaErrorSetOnActiveProcess: + return "cudaErrorSetOnActiveProcess"; + + case cudaErrorInvalidSurface: + return "cudaErrorInvalidSurface"; + + case cudaErrorNoDevice: + return "cudaErrorNoDevice"; + + case cudaErrorECCUncorrectable: + return "cudaErrorECCUncorrectable"; + + case cudaErrorSharedObjectSymbolNotFound: + return "cudaErrorSharedObjectSymbolNotFound"; + + case cudaErrorSharedObjectInitFailed: + return "cudaErrorSharedObjectInitFailed"; + + case cudaErrorUnsupportedLimit: + return "cudaErrorUnsupportedLimit"; + + case cudaErrorDuplicateVariableName: + return "cudaErrorDuplicateVariableName"; + + case cudaErrorDuplicateTextureName: + return "cudaErrorDuplicateTextureName"; + + case cudaErrorDuplicateSurfaceName: + return "cudaErrorDuplicateSurfaceName"; + + case cudaErrorDevicesUnavailable: + return "cudaErrorDevicesUnavailable"; + + case cudaErrorInvalidKernelImage: + return "cudaErrorInvalidKernelImage"; + + case cudaErrorNoKernelImageForDevice: + return "cudaErrorNoKernelImageForDevice"; + + case cudaErrorIncompatibleDriverContext: + return "cudaErrorIncompatibleDriverContext"; + + case cudaErrorPeerAccessAlreadyEnabled: + return "cudaErrorPeerAccessAlreadyEnabled"; + + case cudaErrorPeerAccessNotEnabled: + return "cudaErrorPeerAccessNotEnabled"; + + case cudaErrorDeviceAlreadyInUse: + return "cudaErrorDeviceAlreadyInUse"; + + case cudaErrorProfilerDisabled: + return "cudaErrorProfilerDisabled"; + + case cudaErrorProfilerNotInitialized: + return "cudaErrorProfilerNotInitialized"; + + case cudaErrorProfilerAlreadyStarted: + return "cudaErrorProfilerAlreadyStarted"; + + case cudaErrorProfilerAlreadyStopped: + return "cudaErrorProfilerAlreadyStopped"; + + /* Since CUDA 4.0*/ + case cudaErrorAssert: + return "cudaErrorAssert"; + + case cudaErrorTooManyPeers: + return "cudaErrorTooManyPeers"; + + case cudaErrorHostMemoryAlreadyRegistered: + return "cudaErrorHostMemoryAlreadyRegistered"; + + case cudaErrorHostMemoryNotRegistered: + return "cudaErrorHostMemoryNotRegistered"; + + /* Since CUDA 5.0 */ + case cudaErrorOperatingSystem: + return "cudaErrorOperatingSystem"; + + case cudaErrorPeerAccessUnsupported: + return "cudaErrorPeerAccessUnsupported"; + + case cudaErrorLaunchMaxDepthExceeded: + return "cudaErrorLaunchMaxDepthExceeded"; + + case cudaErrorLaunchFileScopedTex: + return "cudaErrorLaunchFileScopedTex"; + + case cudaErrorLaunchFileScopedSurf: + return "cudaErrorLaunchFileScopedSurf"; + + case cudaErrorSyncDepthExceeded: + return "cudaErrorSyncDepthExceeded"; + + case cudaErrorLaunchPendingCountExceeded: + return "cudaErrorLaunchPendingCountExceeded"; + + case cudaErrorNotPermitted: + return "cudaErrorNotPermitted"; + + case cudaErrorNotSupported: + return "cudaErrorNotSupported"; + + /* Since CUDA 6.0 */ + case cudaErrorHardwareStackError: + return "cudaErrorHardwareStackError"; + + case cudaErrorIllegalInstruction: + return "cudaErrorIllegalInstruction"; + + case cudaErrorMisalignedAddress: + return "cudaErrorMisalignedAddress"; + + case cudaErrorInvalidAddressSpace: + return "cudaErrorInvalidAddressSpace"; + + case cudaErrorInvalidPc: + return "cudaErrorInvalidPc"; + + case cudaErrorIllegalAddress: + return "cudaErrorIllegalAddress"; + + /* Since CUDA 6.5*/ + case cudaErrorInvalidPtx: + return "cudaErrorInvalidPtx"; + + case cudaErrorInvalidGraphicsContext: + return "cudaErrorInvalidGraphicsContext"; + + case cudaErrorStartupFailure: + return "cudaErrorStartupFailure"; + + case cudaErrorApiFailureBase: + return "cudaErrorApiFailureBase"; + } + + return ""; +} +#endif + +template< typename T > +void check(T result, char const *const func, const char *const file, int const line) +{ + if (result) + { + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", + file, line, static_cast(result), _cudaGetErrorEnum(result), func); + cudaDeviceReset(); + // Make sure we call CUDA Device Reset before exiting + exit(EXIT_FAILURE); + } +} + +int *pArgc = NULL; +char **pArgv = NULL; + +#if CUDART_VERSION < 5000 + +// CUDA-C includes +#include + +// This function wraps the CUDA Driver API into a template function +template +inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) +{ + CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device); + + if (CUDA_SUCCESS != error) { + fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n", + error, __FILE__, __LINE__); + + // cudaDeviceReset causes the driver to clean up all state. While + // not mandatory in normal operation, it is good practice. It is also + // needed to ensure correct operation when the application is being + // profiled. Calling cudaDeviceReset causes all profile data to be + // flushed before the application exits + cudaDeviceReset(); + exit(EXIT_FAILURE); + } +} + +#endif /* CUDART_VERSION < 5000 */ + +// Beginning of GPU Architecture definitions +inline int ConvertSMVer2Cores(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = { + { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class + { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class + { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class + { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class + { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class + { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class + { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class + { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class + { -1, -1 } + }; + + int index = 0; + + while (nGpuArchCoresPerSM[index].SM != -1) { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { + return nGpuArchCoresPerSM[index].Cores; + } + + index++; + } + + // If we don't find the values, we default use the previous one to run properly + printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); + return nGpuArchCoresPerSM[index-1].Cores; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main(int argc, char **argv) +{ + pArgc = &argc; + pArgv = argv; + + printf("%s Starting...\n\n", argv[0]); + printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n"); + + int deviceCount = 0; + cudaError_t error_id = cudaGetDeviceCount(&deviceCount); + + if (error_id != cudaSuccess) { + printf("cudaGetDeviceCount failed: %s (%d)\n", + cudaGetErrorString(error_id), (int) error_id); + printf("Result = FAIL\n"); + exit(EXIT_FAILURE); + } + + // This function call returns 0 if there are no CUDA capable devices. + if (deviceCount == 0) + printf("There are no available device(s) that support CUDA\n"); + else + printf("Detected %d CUDA Capable device(s)\n", deviceCount); + + int dev, driverVersion = 0, runtimeVersion = 0; + + for (dev = 0; dev < deviceCount; ++dev) { + cudaSetDevice(dev); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, dev); + + printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name); + + // Console log + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10); + printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor); + + printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", + (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem); + + printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n", + deviceProp.multiProcessorCount, + ConvertSMVer2Cores(deviceProp.major, deviceProp.minor), + ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount); + printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); + + +#if CUDART_VERSION >= 5000 + // This is supported in CUDA 5.0 (runtime API device properties) + printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f); + printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); + + if (deviceProp.l2CacheSize) { + printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize); + } + +#else + // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API) + int memoryClock; + getCudaAttribute(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev); + printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f); + int memBusWidth; + getCudaAttribute(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev); + printf(" Memory Bus Width: %d-bit\n", memBusWidth); + int L2CacheSize; + getCudaAttribute(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev); + + if (L2CacheSize) { + printf(" L2 Cache Size: %d bytes\n", L2CacheSize); + } + +#endif + + printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n", + deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1], + deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); + printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n", + deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]); + printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n", + deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]); + + + printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem); + printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock); + printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock); + printf(" Warp size: %d\n", deviceProp.warpSize); + printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor); + printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock); + printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", + deviceProp.maxThreadsDim[0], + deviceProp.maxThreadsDim[1], + deviceProp.maxThreadsDim[2]); + printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", + deviceProp.maxGridSize[0], + deviceProp.maxGridSize[1], + deviceProp.maxGridSize[2]); + printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch); + printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment); + printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount); + printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); + printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No"); + printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No"); + printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No"); + printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled"); +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)"); +#endif + printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No"); + printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); + + const char *sComputeMode[] = { + "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", + "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", + "Prohibited (no host thread can use ::cudaSetDevice() with this device)", + "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", + "Unknown", + NULL + }; + printf(" Compute Mode:\n"); + printf(" < %s >\n", sComputeMode[deviceProp.computeMode]); + } + + // If there are 2 or more GPUs, query to determine whether RDMA is supported + if (deviceCount >= 2) + { + cudaDeviceProp prop[64]; + int gpuid[64]; // we want to find the first two GPU's that can support P2P + int gpu_p2p_count = 0; + + for (int i=0; i < deviceCount; i++) + { + checkCudaErrors(cudaGetDeviceProperties(&prop[i], i)); + + // Only boards based on Fermi or later can support P2P + if ((prop[i].major >= 2) +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this + && prop[i].tccDriver +#endif + ) + { + // This is an array of P2P capable GPUs + gpuid[gpu_p2p_count++] = i; + } + } + + // Show all the combinations of support P2P GPUs + int can_access_peer_0_1, can_access_peer_1_0; + + if (gpu_p2p_count >= 2) + { + for (int i = 0; i < gpu_p2p_count-1; i++) + { + for (int j = 1; j < gpu_p2p_count; j++) + { + checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j])); + printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i], + prop[gpuid[j]].name, gpuid[j] , + can_access_peer_0_1 ? "Yes" : "No"); + } + } + + for (int j = 1; j < gpu_p2p_count; j++) + { + for (int i = 0; i < gpu_p2p_count-1; i++) + { + checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i])); + printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j], + prop[gpuid[i]].name, gpuid[i] , + can_access_peer_1_0 ? "Yes" : "No"); + } + } + } + } + + // csv masterlog info + // ***************************** + // exe and CUDA driver name + printf("\n"); + std::string sProfileString = "deviceQuery, CUDA Driver = CUDART"; + char cTemp[128]; + + // driver version + sProfileString += ", CUDA Driver Version = "; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10); +#else + sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10); +#endif + sProfileString += cTemp; + + // Runtime version + sProfileString += ", CUDA Runtime Version = "; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); +#else + sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); +#endif + sProfileString += cTemp; + + // Device count + sProfileString += ", NumDevs = "; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 10, "%d", deviceCount); +#else + sprintf(cTemp, "%d", deviceCount); +#endif + sProfileString += cTemp; + + // Print Out all device Names + for (dev = 0; dev < deviceCount; ++dev) + { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 13, ", Device%d = ", dev); +#else + sprintf(cTemp, ", Device%d = ", dev); +#endif + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, dev); + sProfileString += cTemp; + sProfileString += deviceProp.name; + } + + sProfileString += "\n"; + printf("%s", sProfileString.c_str()); + + printf("Result = PASS\n"); + + // finish + // cudaDeviceReset causes the driver to clean up all state. While + // not mandatory in normal operation, it is good practice. It is also + // needed to ensure correct operation when the application is being + // profiled. Calling cudaDeviceReset causes all profile data to be + // flushed before the application exits + cudaDeviceReset(); + return 0; +} diff --git a/pkgs/cudainfo/default.nix b/pkgs/cudainfo/default.nix new file mode 100644 index 0000000..871d697 --- /dev/null +++ b/pkgs/cudainfo/default.nix @@ -0,0 +1,43 @@ +{ + stdenv +, cudatoolkit +, cudaPackages +, autoAddDriverRunpath +, strace +}: + +stdenv.mkDerivation (finalAttrs: { + name = "cudainfo"; + src = ./.; + buildInputs = [ + cudatoolkit # Required for nvcc + cudaPackages.cuda_cudart.static # Required for -lcudart_static + autoAddDriverRunpath + ]; + installPhase = '' + mkdir -p $out/bin + cp -a cudainfo $out/bin + ''; + passthru.gpuCheck = stdenv.mkDerivation { + name = "cudainfo-test"; + requiredSystemFeatures = [ "cuda" ]; + dontBuild = true; + nativeCheckInputs = [ + finalAttrs.finalPackage # The cudainfo package from above + strace # When it fails, it will show the trace + ]; + dontUnpack = true; + doCheck = true; + checkPhase = '' + if ! cudainfo; then + set -x + cudainfo=$(command -v cudainfo) + ldd $cudainfo + readelf -d $cudainfo + strace -f $cudainfo + set +x + fi + ''; + installPhase = "touch $out"; + }; +}) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 7db9941..9d3c960 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -52,4 +52,5 @@ final: prev: prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { }; + cudainfo = prev.callPackage ./cudainfo/default.nix { }; } -- 2.49.0 From 3f3dc2d037346b994c61bfc8c5640b8acdfedf10 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 23 Jul 2025 13:40:33 +0200 Subject: [PATCH 410/472] Disable automatic August shutdown for Fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UPC has different dates for the yearly power cut, and Fox can recover properly from a power loss, so we don't need to have it turned off before the power cut. Simply disabling the timer is enough. Reviewed-by: Aleix Roca Nonell Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index fd5cdc8..436a85e 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -8,6 +8,10 @@ ../module/nvidia.nix ]; + # Don't turn off on August as UPC has different dates. + # Fox works fine on power cuts. + systemd.timers.august-shutdown.enable = false; + # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103"; -- 2.49.0 From 142985c505f66d1820a357445cab3502d48059e3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 23 Jul 2025 13:42:57 +0200 Subject: [PATCH 411/472] Move August shutdown to 3rd at 22h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Roca Nonell Reviewed-by: Aleix Boné --- m/common/base/august-shutdown.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/m/common/base/august-shutdown.nix b/m/common/base/august-shutdown.nix index 624340c..7f79c84 100644 --- a/m/common/base/august-shutdown.nix +++ b/m/common/base/august-shutdown.nix @@ -1,12 +1,12 @@ { - # Shutdown all machines on August 2nd at 11:00 AM, so we can protect the + # Shutdown all machines on August 3rd at 22:00, so we can protect the # hardware from spurious electrical peaks on the yearly electrical cut for # manteinance that starts on August 4th. systemd.timers.august-shutdown = { - description = "Shutdown on August 2nd for maintenance"; + description = "Shutdown on August 3rd for maintenance"; wantedBy = [ "timers.target" ]; timerConfig = { - OnCalendar = "*-08-02 11:00:00"; + OnCalendar = "*-08-03 22:00:00"; RandomizedDelaySec = "10min"; Unit = "systemd-poweroff.service"; }; -- 2.49.0 From 39a211a846252f967759a9c071442cddd627e62b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 23 Jul 2025 14:07:06 +0200 Subject: [PATCH 412/472] Add NixOS module to control power policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Roca Nonell Reviewed-by: Aleix Boné --- m/module/power-policy.nix | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 m/module/power-policy.nix diff --git a/m/module/power-policy.nix b/m/module/power-policy.nix new file mode 100644 index 0000000..87c0414 --- /dev/null +++ b/m/module/power-policy.nix @@ -0,0 +1,31 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.power.policy; +in +{ + options = { + power.policy = mkOption { + type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]); + default = null; + description = "Set power policy to use via IPMI."; + }; + }; + + config = mkIf (cfg != null) { + systemd.services."power-policy" = { + description = "Set power policy to use via IPMI"; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}"; + Type = "oneshot"; + Restart = "on-failure"; + RestartSec = "5s"; + StartLimitBurst = "10"; + StartLimitIntervalSec = "10m"; + }; + }; + }; +} -- 2.49.0 From 343b4f155e60d7a36f8525b500ea8a859c7a8657 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 23 Jul 2025 15:25:47 +0200 Subject: [PATCH 413/472] Set power policy to always turn on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In all machines, as soon as we recover the power, turn the machine back on. We cannot rely on the previous state as we will shut them down before the power is cut to prevent damage on the power supply monitoring circuit. Reviewed-by: Aleix Roca Nonell Reviewed-by: Aleix Boné --- m/common/base.nix | 1 + m/common/base/always-power-on.nix | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 m/common/base/always-power-on.nix diff --git a/m/common/base.nix b/m/common/base.nix index bb0e4e2..cdd1dc1 100644 --- a/m/common/base.nix +++ b/m/common/base.nix @@ -3,6 +3,7 @@ # Includes the basic configuration for an Intel server. imports = [ ./base/agenix.nix + ./base/always-power-on.nix ./base/august-shutdown.nix ./base/boot.nix ./base/env.nix diff --git a/m/common/base/always-power-on.nix b/m/common/base/always-power-on.nix new file mode 100644 index 0000000..cdee12c --- /dev/null +++ b/m/common/base/always-power-on.nix @@ -0,0 +1,8 @@ +{ + imports = [ + ../../module/power-policy.nix + ]; + + # Turn on as soon as we have power + power.policy = "always-on"; +} -- 2.49.0 From d591721a6156207f70f361bbc2c322aa9247eed9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 24 Jul 2025 12:21:05 +0200 Subject: [PATCH 414/472] Move StartLimit* options to unit section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The StartLimitBurst and StartLimitIntervalSec options belong to the [Unit] section, otherwise they are ignored in [Service]: > Unknown key 'StartLimitIntervalSec' in section [Service], ignoring. When using [Unit], the limits are properly set: apex% systemctl show power-policy.service | grep StartLimit StartLimitIntervalUSec=10min StartLimitBurst=10 StartLimitAction=none Reviewed-by: Aleix Boné --- m/module/power-policy.nix | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/m/module/power-policy.nix b/m/module/power-policy.nix index 87c0414..61dcd01 100644 --- a/m/module/power-policy.nix +++ b/m/module/power-policy.nix @@ -18,13 +18,15 @@ in systemd.services."power-policy" = { description = "Set power policy to use via IPMI"; wantedBy = [ "multi-user.target" ]; + unitConfig = { + StartLimitBurst = "10"; + StartLimitIntervalSec = "10m"; + }; serviceConfig = { ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}"; Type = "oneshot"; Restart = "on-failure"; RestartSec = "5s"; - StartLimitBurst = "10"; - StartLimitIntervalSec = "10m"; }; }; }; -- 2.49.0 From f89bba35a689c49c2581627ba7249c7056c9e330 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 27 Aug 2025 15:20:34 +0200 Subject: [PATCH 415/472] Access gitlab via raccoon in fox Reviewed-by: Aleix Roca Nonell --- m/fox/configuration.nix | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 436a85e..614327e 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -35,6 +35,16 @@ services.openssh.settings.X11Forwarding = true; + # Use SSH tunnel to reach internal hosts + programs.ssh.extraConfig = '' + Host bscpm04.bsc.es gitlab-internal.bsc.es tent + ProxyJump raccoon + Host raccoon + ProxyJump apex + HostName 127.0.0.1 + Port 22022 + ''; + networking = { timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ]; hostName = "fox"; -- 2.49.0 From ce258674213792dd2ad04174091968ae66a1327c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Wed, 27 Aug 2025 12:42:08 +0200 Subject: [PATCH 416/472] Add csiringo user with access to apex and weasel Reviewed-by: Rodrigo Arias Mallo --- m/common/base/users.nix | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index b6b2619..221050f 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -154,6 +154,20 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a" ]; }; + + csiringo = { + # Arbitrary UID but large so it doesn't collide with other users on ssfhead. + uid = 9653; + isNormalUser = true; + home = "/home/Computational/csiringo"; + description = "Cesare Siringo"; + group = "Computational"; + hosts = [ "apex" "weasel" ]; + hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1"; + openssh.authorizedKeys.keys = [ + "sh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es" + ]; + }; }; groups = { -- 2.49.0 From a0e4b209b0fb1e2a2afa938ddaac09480b7dd719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Wed, 16 Jul 2025 16:20:40 +0200 Subject: [PATCH 417/472] Enable nix-ld in weasel Reviewed-by: Rodrigo Arias Mallo --- m/weasel/configuration.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/weasel/configuration.nix b/m/weasel/configuration.nix index 8a92f9c..43ee735 100644 --- a/m/weasel/configuration.nix +++ b/m/weasel/configuration.nix @@ -14,6 +14,10 @@ # Users with sudo access users.groups.wheel.members = [ "abonerib" "anavarro" ]; + # Run julia installed with juliaup using julia's own libraries: + # NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia + programs.nix-ld.enable = true; + networking = { hostName = "weasel"; interfaces.eno1.ipv4.addresses = [ { -- 2.49.0 From 5ee8623af28d938050aa5d782b82e9ec5e824d74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Wed, 27 Aug 2025 17:21:23 +0200 Subject: [PATCH 418/472] Fix typo in csiringo ssh key Reviewed-by: Rodrigo Arias Mallo --- m/common/base/users.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 221050f..8aace8d 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -165,7 +165,7 @@ hosts = [ "apex" "weasel" ]; hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1"; openssh.authorizedKeys.keys = [ - "sh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es" ]; }; }; -- 2.49.0 From ee895d2e4f7be2c654b7cac6996b8154d9add3b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 27 Aug 2025 11:56:20 +0200 Subject: [PATCH 419/472] Move slurm control server to apex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/apex/configuration.nix | 2 ++ m/hut/configuration.nix | 1 - m/module/slurm-client.nix | 2 +- m/{hut => module}/slurm-server.nix | 0 4 files changed, 3 insertions(+), 2 deletions(-) rename m/{hut => module}/slurm-server.nix (100%) diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index 540e507..a18bbfc 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -5,6 +5,8 @@ ../common/xeon.nix ../common/ssf/hosts.nix ../module/ceph.nix + ../module/slurm-client.nix + ../module/slurm-server.nix ./nfs.nix ]; diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 493208c..d301b92 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -11,7 +11,6 @@ ./gitlab-runner.nix ./monitoring.nix ./nfs.nix - ./slurm-server.nix ./nix-serve.nix ./public-inbox.nix ./gitea.nix diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 21ae945..02b5208 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -39,7 +39,7 @@ in { services.slurm = { client.enable = true; - controlMachine = "hut"; + controlMachine = "apex"; clusterName = "jungle"; nodeName = [ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" diff --git a/m/hut/slurm-server.nix b/m/module/slurm-server.nix similarity index 100% rename from m/hut/slurm-server.nix rename to m/module/slurm-server.nix -- 2.49.0 From f569933732117c59d67bc0e00b73a258e3f57cd9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 27 Aug 2025 12:36:52 +0200 Subject: [PATCH 420/472] Split slurm configuration for client and server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/slurm-client.nix | 120 ++------------------------------------ m/module/slurm-common.nix | 116 ++++++++++++++++++++++++++++++++++++ m/module/slurm-server.nix | 8 ++- 3 files changed, 127 insertions(+), 117 deletions(-) create mode 100644 m/module/slurm-common.nix diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 02b5208..84ba4c7 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -1,33 +1,10 @@ -{ config, pkgs, lib, ... }: +{ lib, ... }: -let - suspendProgram = pkgs.writeScript "suspend.sh" '' - #!/usr/bin/env bash - exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log - set -x - export "PATH=/run/current-system/sw/bin:$PATH" - echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log - hosts=$(scontrol show hostnames $1) - for host in $hosts; do - echo Shutting down host: $host - ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off - done - ''; +{ + imports = [ + ./slurm-common.nix + ]; - resumeProgram = pkgs.writeScript "resume.sh" '' - #!/usr/bin/env bash - exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log - set -x - export "PATH=/run/current-system/sw/bin:$PATH" - echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log - hosts=$(scontrol show hostnames $1) - for host in $hosts; do - echo Starting host: $host - ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on - done - ''; - -in { systemd.services.slurmd.serviceConfig = { # Kill all processes in the control group on stop/restart. This will kill # all the jobs running, so ensure that we only upgrade when the nodes are @@ -37,90 +14,5 @@ in { KillMode = lib.mkForce "control-group"; }; - services.slurm = { - client.enable = true; - controlMachine = "apex"; - clusterName = "jungle"; - nodeName = [ - "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" - "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" - ]; - - partitionName = [ - "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" - ]; - - # See slurm.conf(5) for more details about these options. - extraConfig = '' - # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but - # not with Intel MPI. For that use the compatibility shim libpmi.so - # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx - # library in SLURM (--mpi=pmix). See more details here: - # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 - MpiDefault=pmix - - # When a node reboots return that node to the slurm queue as soon as it - # becomes operative again. - ReturnToService=2 - - # Track all processes by using a cgroup - ProctrackType=proctrack/cgroup - - # Enable task/affinity to allow the jobs to run in a specified subset of - # the resources. Use the task/cgroup plugin to enable process containment. - TaskPlugin=task/affinity,task/cgroup - - # Power off unused nodes until they are requested - SuspendProgram=${suspendProgram} - SuspendTimeout=60 - ResumeProgram=${resumeProgram} - ResumeTimeout=300 - SuspendExcNodes=hut - - # Turn the nodes off after 1 hour of inactivity - SuspendTime=3600 - - # Reduce port range so we can allow only this range in the firewall - SrunPortRange=60000-61000 - - # Use cores as consumable resources. In SLURM terms, a core may have - # multiple hardware threads (or CPUs). - SelectType=select/cons_tres - - # Ignore memory constraints and only use unused cores to share a node with - # other jobs. - SelectTypeParameters=CR_Core - - # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html - # This sets up the "extern" step into which ssh-launched processes will be - # adopted. Alloc runs the prolog at job allocation (salloc) rather than - # when a task runs (srun) so we can ssh early. - PrologFlags=Alloc,Contain,X11 - - # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes - # adopted by the external step, similar to tasks running in regular steps - # LaunchParameters=ulimit_pam_adopt - SlurmdDebug=debug5 - #DebugFlags=Protocol,Cgroup - ''; - - extraCgroupConfig = '' - CgroupPlugin=cgroup/v2 - #ConstrainCores=yes - ''; - }; - - # Place the slurm config in /etc as this will be required by PAM - environment.etc.slurm.source = config.services.slurm.etcSlurm; - - age.secrets.mungeKey = { - file = ../../secrets/munge-key.age; - owner = "munge"; - group = "munge"; - }; - - services.munge = { - enable = true; - password = config.age.secrets.mungeKey.path; - }; + services.slurm.client.enable = true; } diff --git a/m/module/slurm-common.nix b/m/module/slurm-common.nix new file mode 100644 index 0000000..6af746f --- /dev/null +++ b/m/module/slurm-common.nix @@ -0,0 +1,116 @@ +{ config, pkgs, ... }: + +let + suspendProgram = pkgs.writeScript "suspend.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Shutting down host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off + done + ''; + + resumeProgram = pkgs.writeScript "resume.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Starting host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on + done + ''; + +in { + services.slurm = { + controlMachine = "apex"; + clusterName = "jungle"; + nodeName = [ + "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" + "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" + ]; + + partitionName = [ + "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + ]; + + # See slurm.conf(5) for more details about these options. + extraConfig = '' + # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but + # not with Intel MPI. For that use the compatibility shim libpmi.so + # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx + # library in SLURM (--mpi=pmix). See more details here: + # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 + MpiDefault=pmix + + # When a node reboots return that node to the slurm queue as soon as it + # becomes operative again. + ReturnToService=2 + + # Track all processes by using a cgroup + ProctrackType=proctrack/cgroup + + # Enable task/affinity to allow the jobs to run in a specified subset of + # the resources. Use the task/cgroup plugin to enable process containment. + TaskPlugin=task/affinity,task/cgroup + + # Power off unused nodes until they are requested + SuspendProgram=${suspendProgram} + SuspendTimeout=60 + ResumeProgram=${resumeProgram} + ResumeTimeout=300 + SuspendExcNodes=hut + + # Turn the nodes off after 1 hour of inactivity + SuspendTime=3600 + + # Reduce port range so we can allow only this range in the firewall + SrunPortRange=60000-61000 + + # Use cores as consumable resources. In SLURM terms, a core may have + # multiple hardware threads (or CPUs). + SelectType=select/cons_tres + + # Ignore memory constraints and only use unused cores to share a node with + # other jobs. + SelectTypeParameters=CR_Core + + # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html + # This sets up the "extern" step into which ssh-launched processes will be + # adopted. Alloc runs the prolog at job allocation (salloc) rather than + # when a task runs (srun) so we can ssh early. + PrologFlags=Alloc,Contain,X11 + + # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes + # adopted by the external step, similar to tasks running in regular steps + # LaunchParameters=ulimit_pam_adopt + SlurmdDebug=debug5 + #DebugFlags=Protocol,Cgroup + ''; + + extraCgroupConfig = '' + CgroupPlugin=cgroup/v2 + #ConstrainCores=yes + ''; + }; + + # Place the slurm config in /etc as this will be required by PAM + environment.etc.slurm.source = config.services.slurm.etcSlurm; + + age.secrets.mungeKey = { + file = ../../secrets/munge-key.age; + owner = "munge"; + group = "munge"; + }; + + services.munge = { + enable = true; + password = config.age.secrets.mungeKey.path; + }; +} diff --git a/m/module/slurm-server.nix b/m/module/slurm-server.nix index e7fab8b..f84085d 100644 --- a/m/module/slurm-server.nix +++ b/m/module/slurm-server.nix @@ -1,7 +1,9 @@ { ... }: { - services.slurm = { - server.enable = true; - }; + imports = [ + ./slurm-common.nix + ]; + + services.slurm.server.enable = true; } -- 2.49.0 From f7dff9deabd6c2e6a6c7e4fd7660a37bb3e0e619 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 27 Aug 2025 12:37:21 +0200 Subject: [PATCH 421/472] Only configure apex as slurm server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/apex/configuration.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index a18bbfc..9b4f0f4 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -5,7 +5,6 @@ ../common/xeon.nix ../common/ssf/hosts.nix ../module/ceph.nix - ../module/slurm-client.nix ../module/slurm-server.nix ./nfs.nix ]; -- 2.49.0 From 20b48053355e86d161b0233c507366fd943ed869 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 27 Aug 2025 12:43:12 +0200 Subject: [PATCH 422/472] Remove hut from slurm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/configuration.nix | 1 - m/module/slurm-common.nix | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index d301b92..9e8c1a2 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -7,7 +7,6 @@ ../module/ceph.nix ../module/debuginfod.nix ../module/emulation.nix - ../module/slurm-client.nix ./gitlab-runner.nix ./monitoring.nix ./nfs.nix diff --git a/m/module/slurm-common.nix b/m/module/slurm-common.nix index 6af746f..e65d9cc 100644 --- a/m/module/slurm-common.nix +++ b/m/module/slurm-common.nix @@ -33,7 +33,6 @@ in { clusterName = "jungle"; nodeName = [ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" - "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; partitionName = [ @@ -65,7 +64,7 @@ in { SuspendTimeout=60 ResumeProgram=${resumeProgram} ResumeTimeout=300 - SuspendExcNodes=hut + #SuspendExcNodes= # Turn the nodes off after 1 hour of inactivity SuspendTime=3600 -- 2.49.0 From 74130214403c7ec78317584a0c5018e8fae07a66 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 27 Aug 2025 12:59:21 +0200 Subject: [PATCH 423/472] Add firewall rules to slurm server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/slurm-server.nix | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/m/module/slurm-server.nix b/m/module/slurm-server.nix index f84085d..6536136 100644 --- a/m/module/slurm-server.nix +++ b/m/module/slurm-server.nix @@ -6,4 +6,13 @@ ]; services.slurm.server.enable = true; + + networking.firewall = { + extraCommands = '' + # Accept slurm connections to controller from compute nodes + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept + # Accept slurm connections from compute nodes for srun + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept + ''; + }; } -- 2.49.0 From 200c727bbf568bd04de213a729eb651b564e5dab Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 12:02:12 +0200 Subject: [PATCH 424/472] Use writeShellScript for suspend.sh and resume.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/module/slurm-common.nix | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/m/module/slurm-common.nix b/m/module/slurm-common.nix index e65d9cc..2303830 100644 --- a/m/module/slurm-common.nix +++ b/m/module/slurm-common.nix @@ -1,8 +1,7 @@ { config, pkgs, ... }: let - suspendProgram = pkgs.writeScript "suspend.sh" '' - #!/usr/bin/env bash + suspendProgram = pkgs.writeShellScript "suspend.sh" '' exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log set -x export "PATH=/run/current-system/sw/bin:$PATH" @@ -14,8 +13,7 @@ let done ''; - resumeProgram = pkgs.writeScript "resume.sh" '' - #!/usr/bin/env bash + resumeProgram = pkgs.writeShellScript "resume.sh" '' exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log set -x export "PATH=/run/current-system/sw/bin:$PATH" -- 2.49.0 From e415f70bbb8a0a97b682ba631770f681d4d235b8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 13:38:47 +0200 Subject: [PATCH 425/472] Add wireguard server in fox Reviewed-by: Aleix Roca Nonell --- keys.nix | 1 + m/fox/configuration.nix | 1 + m/fox/wireguard.nix | 35 +++++++++++++++++++++++++++++++++++ secrets/secrets.nix | 3 +++ secrets/wg-fox.age | Bin 0 -> 697 bytes 5 files changed, 40 insertions(+) create mode 100644 m/fox/wireguard.nix create mode 100644 secrets/wg-fox.age diff --git a/keys.nix b/keys.nix index 6fbb78a..75f1c38 100644 --- a/keys.nix +++ b/keys.nix @@ -31,6 +31,7 @@ rec { admins = { "rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; "rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent"; + "rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox"; root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"; }; } diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 614327e..ab82949 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -6,6 +6,7 @@ ../common/xeon/console.nix ../module/emulation.nix ../module/nvidia.nix + ./wireguard.nix ]; # Don't turn off on August as UPC has different dates. diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix new file mode 100644 index 0000000..34d84c0 --- /dev/null +++ b/m/fox/wireguard.nix @@ -0,0 +1,35 @@ +{ config, ... }: + +{ + networking.firewall = { + allowedUDPPorts = [ 666 ]; + }; + + age.secrets.wgFox.file = ../../secrets/wg-fox.age; + + networking.wireguard.enable = true; + networking.wireguard.interfaces = { + # "wg0" is the network interface name. You can name the interface arbitrarily. + wg0 = { + # Determines the IP address and subnet of the server's end of the tunnel interface. + ips = [ "10.100.0.1/24" ]; + + # The port that WireGuard listens to. Must be accessible by the client. + listenPort = 666; + + # Path to the private key file. + privateKeyFile = config.age.secrets.wgFox.path; + # Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y= + + peers = [ + # List of allowed peers. + { + name = "Apex"; + publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA="; + # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing. + allowedIPs = [ "10.100.0.30/32" ]; + } + ]; + }; + }; +} diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 776e73f..013fa23 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -2,6 +2,7 @@ let keys = import ../keys.nix; adminsKeys = builtins.attrValues keys.admins; hut = [ keys.hosts.hut ] ++ adminsKeys; + fox = [ keys.hosts.fox ] ++ adminsKeys; mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys; tent = [ keys.hosts.tent ] ++ adminsKeys; # Only expose ceph keys to safe nodes and admins @@ -24,4 +25,6 @@ in "ceph-user.age".publicKeys = safe; "munge-key.age".publicKeys = safe; + + "wg-fox.age".publicKeys = fox; } diff --git a/secrets/wg-fox.age b/secrets/wg-fox.age new file mode 100644 index 0000000000000000000000000000000000000000..187ddfedbaf33546761b07332500c774f0ab938e GIT binary patch literal 697 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCSnc5zBfa#Zke%SkOr zsxmZi&(8|2NRLSIO*i!ScS`d&2sJP$P7BY?Hx8|;3J=Q8%;&N&PS0}n@Y0Sn_V73H z@hT0@^(gT)@Hg_!DhPAWP1er~$u=vlO7{yWw!pB>+cevwT*1`UC(5)q%%t4U&)uic z&?3`0Bh{nAyuc^VFf7F=-#@@T(aqhXBs9^(k;_uMpui}-qRiFJH8IL1#Ldmy-!H4o zF*qdC*(p0SAj>__*)=sev)nb;6Wumv$8dL(Km})miVUytOoMPAW3OQU+{|1L|D{q?6ax#x2m|k; zs61Co*Mbt`yz;c7P;}cW9L+54R$k5Nms~s3~&l|PI7g3&I$@I%6E^5NDIj<^2>EHO?P(7cXx5mbPS6O zFiA^u_2lw3$#5?Zud1{NaVs(Q49iUmNOyFN2z7Gt3QpG#F7UI=D~>G6bge2#F6YwK z)m1PKEzYU(2re-Kyfld zes2ExwRP8%*WaF6ViW5vU!`|%lgP6z*RJHpv=(%{sGlH|$Z^y2<0q>H`CHEXx81ng cH(?cPNqV>9!MY~bTN+ Date: Fri, 29 Aug 2025 13:52:05 +0200 Subject: [PATCH 426/472] Enable wireguard in apex Reviewed-by: Aleix Roca Nonell --- m/apex/configuration.nix | 1 + m/apex/wireguard.nix | 31 +++++++++++++++++++++++++++++++ secrets/secrets.nix | 2 ++ secrets/wg-apex.age | 13 +++++++++++++ 4 files changed, 47 insertions(+) create mode 100644 m/apex/wireguard.nix create mode 100644 secrets/wg-apex.age diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index 9b4f0f4..17828a3 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -7,6 +7,7 @@ ../module/ceph.nix ../module/slurm-server.nix ./nfs.nix + ./wireguard.nix ]; # Don't install grub MBR for now diff --git a/m/apex/wireguard.nix b/m/apex/wireguard.nix new file mode 100644 index 0000000..a0636be --- /dev/null +++ b/m/apex/wireguard.nix @@ -0,0 +1,31 @@ +{ config, ... }: + +{ + networking.firewall = { + allowedUDPPorts = [ 666 ]; + }; + + age.secrets.wgApex.file = ../../secrets/wg-apex.age; + + # Enable WireGuard + networking.wireguard.enable = true; + networking.wireguard.interfaces = { + # "wg0" is the network interface name. You can name the interface arbitrarily. + wg0 = { + ips = [ "10.100.0.30/24" ]; + listenPort = 666; + privateKeyFile = config.age.secrets.wgApex.path; + # Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA= + peers = [ + { + name = "Fox"; + publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y="; + allowedIPs = [ "10.100.0.0/24" ]; + endpoint = "fox.ac.upc.edu:666"; + # Send keepalives every 25 seconds. Important to keep NAT tables alive. + persistentKeepalive = 25; + } + ]; + }; + }; +} diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 013fa23..9673249 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -3,6 +3,7 @@ let adminsKeys = builtins.attrValues keys.admins; hut = [ keys.hosts.hut ] ++ adminsKeys; fox = [ keys.hosts.fox ] ++ adminsKeys; + apex = [ keys.hosts.apex ] ++ adminsKeys; mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys; tent = [ keys.hosts.tent ] ++ adminsKeys; # Only expose ceph keys to safe nodes and admins @@ -27,4 +28,5 @@ in "munge-key.age".publicKeys = safe; "wg-fox.age".publicKeys = fox; + "wg-apex.age".publicKeys = apex; } diff --git a/secrets/wg-apex.age b/secrets/wg-apex.age new file mode 100644 index 0000000..d61c88c --- /dev/null +++ b/secrets/wg-apex.age @@ -0,0 +1,13 @@ +age-encryption.org/v1 +-> ssh-ed25519 fw2Xhg CO3xkQZ+Tzej6VOQg8GoGYMdRWP0s0w55S7PeZ/zq0o +P119jX3o3ZdD+Dg/ehVIEsWdSCbmYksLDt4k4dovgl8 +-> ssh-ed25519 cK5kHw B9d+pwXxt8jDV+6WvHOxUQuwpYF4mHUoGPQaD7niuX4 +RxYK+0r28qTULJKaZiSQC5yqsi/BYTZ40TSIhpwxCk0 +-> ssh-ed25519 CAWG4Q akKCzdpc/17zOdLknTawmpJw3GuTLXjDlZz8CQg3hGE +011OH3AnEmm36xg0p7FvHOL1xVpMllUf6lqs4BSUVeA +-> ssh-ed25519 xA739A hv9vYtwBhrospLhJ5SQoNDkZx+blfYKZ/I40frj/u1Y +s6KY1l2d6dEQouLbwL1Y56kC0Up9Tp1dmW91LYNYKjs +-> ssh-ed25519 MSF3dg ehzqonjVpx3NFaxmSQc66VxkZEU5n7aiIOvuwYrl7wI +yME0q4b5F9/eKJlNg+0sGuGTGp58UoNAf+MI8pj2faM +--- KC628nslyddv5xZwh4DUj1nGqMx/mW+txSnYCjFRODY +dl[b^َ-l+ТFfuڑnHLK ^12Vj[ \ No newline at end of file -- 2.49.0 From 46d03d5ca72a087df44409f38f6a6036a693f490 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 14:11:19 +0200 Subject: [PATCH 427/472] Add local host fox in apex Reviewed-by: Aleix Roca Nonell --- m/apex/wireguard.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/apex/wireguard.nix b/m/apex/wireguard.nix index a0636be..bd5b351 100644 --- a/m/apex/wireguard.nix +++ b/m/apex/wireguard.nix @@ -28,4 +28,8 @@ ]; }; }; + + networking.hosts = { + "10.100.0.1" = [ "fox" ]; + }; } -- 2.49.0 From 6bbfb0d1244b79728a2a4e6f6f4b0cb4bf2a6475 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 14:29:14 +0200 Subject: [PATCH 428/472] Make apex host specific to each machine Allows direct contact via the VPN when accessing from fox, but use Internet when using the rest of the machines. Reviewed-by: Aleix Roca Nonell --- m/common/base/net.nix | 2 +- m/fox/wireguard.nix | 4 ++++ m/raccoon/configuration.nix | 1 + m/tent/configuration.nix | 3 +++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/m/common/base/net.nix b/m/common/base/net.nix index 3a64c78..64e6160 100644 --- a/m/common/base/net.nix +++ b/m/common/base/net.nix @@ -14,7 +14,7 @@ nftables.enable = lib.mkForce false; hosts = { - "84.88.53.236" = [ "apex" "ssfhead.bsc.es" "ssfhead" ]; + "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ]; "84.88.51.152" = [ "raccoon" ]; "84.88.51.142" = [ "raccoon-ipmi" ]; }; diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix index 34d84c0..a16f152 100644 --- a/m/fox/wireguard.nix +++ b/m/fox/wireguard.nix @@ -32,4 +32,8 @@ ]; }; }; + + networking.hosts = { + "10.100.0.30" = [ "apex" ]; + }; } diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 46f63f5..cec17e6 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -39,6 +39,7 @@ }; hosts = { "10.0.44.4" = [ "tent" ]; + "84.88.53.236" = [ "apex" ]; }; }; diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 57ac6e9..416d8df 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -33,6 +33,9 @@ nameservers = [ "84.88.52.35" "84.88.52.36" ]; search = [ "bsc.es" "ac.upc.edu" ]; defaultGateway = "10.0.44.1"; + hosts = { + "84.88.53.236" = [ "apex" ]; + }; }; services.p.enable = true; -- 2.49.0 From c5d3b8e7f00c2a82a1dfc9bcfd5af534248ccbc0 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 14:35:51 +0200 Subject: [PATCH 429/472] Trust fox for compute node secrets Reviewed-by: Aleix Roca Nonell --- keys.nix | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/keys.nix b/keys.nix index 75f1c38..b26f11a 100644 --- a/keys.nix +++ b/keys.nix @@ -16,8 +16,7 @@ rec { }; hostGroup = with hosts; rec { - untrusted = [ fox ]; - compute = [ owl1 owl2 ]; + compute = [ owl1 owl2 fox ]; playground = [ eudy koro weasel ]; storage = [ bay lake2 ]; monitor = [ hut ]; -- 2.49.0 From 20e7d244d1ca9bc8d212998364eb5800427d5e34 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 14:39:28 +0200 Subject: [PATCH 430/472] Rekey secrets with trusted fox key Reviewed-by: Aleix Roca Nonell --- secrets/ceph-user.age | Bin 1133 -> 1353 bytes secrets/gitea-runner-token.age | 22 ++++++++-------- secrets/gitlab-bsc-docker-token.age | Bin 629 -> 739 bytes secrets/gitlab-runner-docker-token.age | Bin 626 -> 736 bytes secrets/gitlab-runner-shell-token.age | Bin 626 -> 736 bytes secrets/ipmi.yml.age | Bin 1563 -> 1673 bytes secrets/jungle-robot-password.age | Bin 697 -> 807 bytes secrets/munge-key.age | Bin 2116 -> 2336 bytes secrets/nix-serve.age | Bin 755 -> 865 bytes .../tent-gitlab-runner-bsc-docker-token.age | 22 ++++++++-------- .../tent-gitlab-runner-pm-docker-token.age | 22 ++++++++-------- secrets/tent-gitlab-runner-pm-shell-token.age | 23 +++++++++-------- secrets/vpn-dac-client-key.age | Bin 2246 -> 2356 bytes secrets/vpn-dac-login.age | 24 ++++++++++-------- secrets/wg-apex.age | Bin 697 -> 697 bytes secrets/wg-fox.age | Bin 697 -> 697 bytes 16 files changed, 61 insertions(+), 52 deletions(-) diff --git a/secrets/ceph-user.age b/secrets/ceph-user.age index 951722d4439da7d74124c3d7b57c5e9d18960aab..48b912cd6db3dc3a7a65390c1c604f982ff5731b 100644 GIT binary patch literal 1353 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCT4jI^loE?3Y`&&_c2 zcPsQSw(!hPa`enC3k>s4^)-z&%QkQ;C=D_*)ORj5O(_Zs^W;jmbc^sxH_G?S&rHoT zsPZy5a0~Y+4-WCQ2zDw@DhM{Tun0B|F3&P|^+mU>INdQZ*HIzAARsFzJRl=D)6*a* z#Wcq}JH^PrB-qp~!p$c+FgY--q|!Ud+}9;HFOm@2I5GBwuqgXXBFmq=5WNXE&Fi z;G-vZ-lW_lh&-4`kpkyv@)9mEzPy_FbK(`QAv*65ZuLw76qJEpv*z^uto~bG&^j(yCIu9Mg=_Je`aRQatnY z%}TQi{j$+*D@iWR4G2^)b_vLK3GfUI3=S)a2==M+3~~)A_N%%F6Q(F)S-fFErM6O!oKo4t6*7%g-zFHgzp@4of!hEzd`{ z&CAQsCp=xjC_FVS**w)JKgT!J)HBaD*V!+m(8$m?&D=28(;(5v&(I^?r6|XvtkQ(5 zFv2a|(mBv6uc)dtDas(XD%ij?-zeNYsHoiCDLXecGB3p`BsnTiKMf<5B)d2zCOInP zXL=Z$Ip%wo=%=~4MW*NFSCn{|`{$>Y1ZW%SmuHpaSrq0Q2YI;!dn9uedgo?WR2n8m zxd)g;l{*_`dYI>#XJmRAXcxH!MwVp-CYkyhCK^T=6q%s=E!o>N+oN0|DZ)6Y#NRZy z#5Xl7G&0;V(b3pEEz85vG|%5WqS&x3+$AvExY9qX*eIXNHN7G?InBb&+c8Dk)ilG` zCpRO^urjdNIp4I%t1>jPD$TIC%D=22#1X@9&W_>kCV>k1-i3uhE?yo6 z*#_kn0sh)mrs-~exxtC%d+%0Ou@)+zQJzBDd`HPrm5L}$pzY(c}bxu+U|k+VNvEz`A&|(iCz|wE`@I9 zCeC3Im4*SK6@grdAs#+Hj!9L40pZ~}Df<2)#!lX;Ny%nz$+^Xaz9EU>&H+yDxrU)0 znaNzby1ELvVcCwpd7hRLff>QZX}+0mrKJ%8`jtLGmgTPQmfAr@76pzbp~(h`WszJg z&s{@~ delta 1028 zcmX@f^_F9TPQ6Q6l7WxEv1M?ErB`58K#4_2L6}oe?QAmoXwn0FCX;7MZj)|*LiCa>bUs0Knws}yJk)=tpiAPF+ zMO0*@n{lNHSAMahpGl5cSfRVAOR`~Jrg?BdiHCQkOL(GVj)$p()3r#E1^ertWpJf!UuZRjZ&DXB<^>+1g3@kA+%63cj zayL%b_X^E*a?H)iGpNdma`ef}NplV5a&^w~aC9v4i1IQDFDc70HVpJM^D!=s$Tc_e z^Gy!Q_pCClG;=O3$uRXrj{z?)L!a<;g$!rse6ygy98;(AsB({}ET{UsoYcZFr_wC< zKo7Hkauf5aD)V%6kKD}ce6Aw>z_4VaB*#kcB$HxqKbOMfh>*Oz3@-!kkYcyuz~Zz5 zH)D6>oMQL#bad;S9mCyC0u{ow-NK90y@CsilZ_qSvyEN-N|W45oGi>D4Bb+~T%7Ww z>Z{6pQX>tE9n-mTJt_=?gH1iW3-t3G9gDrp3QLVcL(4Vg^6&`_GV(AfD%CCwFv^Z_vIr>f%PYxrF)xpF%C0n+oX8?x zU#4GD=gt;BS(XuITH;id>K~MA7NYN*mu!$z<`L*0nVMm&o$8y;rK_u};8T=b*EYFv>X= ssh-ed25519 HY2yRg d7+nvfAcdC3GjJxipXFrsfGGyP5jAY+gRWRV+4FVYAM -CG7r0bRGgnUWcdfDnpe7HwZ3L/y7b5iuJuqvf15b3/Y --> ssh-ed25519 CAWG4Q X0vITOErz4wkR3VQYOcVlnrkHtwe+ytdZz1Hcrs4vVs -6IWYOhXLQ+BnML9YfLLHJYEO2CZ/uEc9IBqhoWvjDHI --> ssh-ed25519 xA739A p5e/0AJtZ0+zbRvkB/usLuxusY8xXRx9Ksi/LQlcIHw -M4S/qlzT9POyJx4gY9lmycstUcdwG2cinN4OlV22zzo --> ssh-ed25519 MSF3dg Ydl7uBWzBx6sAaxbzC3x8qiaU3ysGqV4rUFLpHCEV30 -/1AUHBhCNOs9i7LJbmzwQDHsu+ybzYf6+coztKk5E3U ---- kYt15WxClpT7PXD1oFe9GqJU+OswjH7y9wIc8/GzZ7M -hߓ`V4F_k)^m$uj:ѳ}Z]$U]u 0v8?XPg%d#d9{rAi \ No newline at end of file +-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik +DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI +-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio +hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU +-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw +hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs +-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY +eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM +-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg +/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg +--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs +YM:E O2r=&4CQΣhCcb^Sy% x-vC`gW^wVG \ No newline at end of file diff --git a/secrets/gitlab-bsc-docker-token.age b/secrets/gitlab-bsc-docker-token.age index 985097b038e77c9122ea636bcbdafaa1ac7dad1d..2b77fcf03d55cbefb941db0c5aa74b2363f59f58 100644 GIT binary patch delta 669 zcmey$@|bmkPJLxbfnij3u$OyIaiL>gxVcwVm4R7!YErp@x1Vd7TewS+qiINGnSW5a zCs$TtzHxqKXjV`}X;NxHNU(unRe5r#fse6QxMfgUWOiV>Q9zbiS+Z|QF_*5LLUD11 zZfc5=si~o*LbA7Mwnw=_fR}HIWnyr0iMwN=vr&eJQGHl&R-!?0u5U_eWv+LjbD~pr zMRH(enon3Hmy2nspHsGRS!GUsXkoT{xQAO(fJKsld2)(LKw3#+o?A$9v5!SwXlZ^r zx^>Qu;qE4Z3c-es6~@}7RjC%`{$5FDRi!2smWJj&rbQMN&QZo?*=2dAt`*7dd8V!* z*<1$cjuwu|29?gCA?}GTJ{2XFWl4@HDPdv0{!SqtB|hd}B~j*?Degvrp%XufhwG>1 zxR-~U6&9KLRGFpw8fO(p1vn-ZrRx_P7lr#-q$Q<#l_&X{Xjg<8a8mpjr}b1;l(ioHsvv&lX7hWi z*Gx5A?>$pEG5t~+`>*%Pd%@=Z;o;t% z=`P8JPG$O;9*M;UF6N#dxk*K(re2l$o|PtvNukA_r6DfP7TV#-T-km}uKLbS>FGha z>7m)i{;A~!B|*mde);+(`lbGXrD+y0NE=&r| zuFQ5VFiG`E=hD^HRjAO;tjJ6WPt7WG_YL=}GApb~&2bB;(zY!2GBgb_&(BY*s5f@E z^f!!(a^;%L_aV$JL)X^n+#K^mjAD2<-h%A%EZ5VL!JIJ{)>B?lq%ET hmkRHc+5f>$**xL4^RctP>Jd003X=#)AL= diff --git a/secrets/gitlab-runner-docker-token.age b/secrets/gitlab-runner-docker-token.age index c481b59b138c092201c280c42e93d28a25bf46de..e7f58c7f5cc7265edd0954dcfcc473060c0dc0ee 100644 GIT binary patch delta 665 zcmeyw@_==MPJO76cVKu~W|U``UutfyWvWGnxrcVDe^q{@UqwW^k5901MsbvnuVblu zHkVnRX^MxTZ?a!iwsBE$a)3#hQ9)r;q)T{7sGDz!p=nW6v1^KcZlR}}FPE;JLUD11 zZfc5=si~o*LbA7Mwnw>wYf@2`esV#TVX|AeW29r4WxanzdS#`Dg{i-*Z)jdxsArg& zS%G<0V!m4>mzkG&MtGsWOG&VPnx~tIQ$a;WMN&~kRH12EQhrWZl22ifcVS{xh=CD? zbxe6LjN6@ESyA$di)fgYyWsoF+~rh(4M zT)Bm20V%<5+TQ+!B^jj=V z7MGW%7P`0=fFsw33_RlOxtjsW|ip(xcEOrfZEpsnQP0UJ7 zchyfRbP6-CO)_&ePjt&L;M&~OystiJn!|Pd%xRwLmwxv8Rm>_*$MPVjf8Ho`Q*@-4mS>A?W zhAAN?UdFC&IfjOYiN0m!W?pWkRgvx~zJA8#+S$dE;~B-ngM33${3?wD%k&G443jF; z45~6*lasv?3v$v*JktFu%#F;9%2NDIj4HCZ3QPjBB8rTP1AI;M{oE_H3!=P2tBgx6 zN?i;C@(oi`&BDXN4cr5K@{6-4pJf!UH}p(5b<_?BDDiMfOHa!+iAoPI3raWh4M@#T zcFjr5E+{rn@p8^L4iB{8GRV)+E{gOpP0lg)GDyrU49>{(FUXDZ3r{RD@$oiwN%YJP zG0TfAt@J78($&>f$W3>NaxBeE%XRZkE-(u-i8873Pj&Lh$uJKwa}6wUHn()DElLY< zEhsli=XzNvckXEAZ^icdqIZFzfwxXr$(`kBObmR-veu_O?1y{rb%m%`di?k6r*|Jp z`}zM`vZLNDewS;T!`5oIT(LQOv~K&u_l<&b7J93$O_B&LyY1)vnQ>0Rst)s&M+8nv ci*M6euz2tHhl>RmxL#D+?=e`Qdq%Jc09TI0xc~qF diff --git a/secrets/gitlab-runner-shell-token.age b/secrets/gitlab-runner-shell-token.age index 8ecc7e23301a3be68696fcb1dfd8212123fed120..0290f9a75e16f83b17a15c13d4627b08dc84fbee 100644 GIT binary patch delta 665 zcmeyw@_==MPQ72GyMJ(MRGvkiyH{voL3qBuqoZYnzeRv~YI1g{lXrx3u|;`6v37Y* zC|ABuPI_XLSC~(jrDv{ThC!K!t9L+{V?l9-iCJQqMShuYx?jGVi+4_{374*&LUD11 zZfc5=si~o*LbA7Mwnw=_lCx!KPGCf8fT>Tpr<lRGEoUaZ0*#VQ`v@nUP5( zx^>Qu;qE4Z3gOva=~<~M&Q5tInI=j4VQ!_Nmf?j)hGE$r6`7^w#W{(l+6DoonZcPZ zCS3Y?d1;kKE+G|x+1Y8%g;k!3+Bp^Jg?VPap6PBmex~VB9+8ob1*rjL#S=e@hg+s6 z=b2@hCz}^J2bmf;rj}%uI%hlP`4$Je`j!NiC}7v_XyJBDWzbLB)7`R8dTr)ESI z1Z8^$RRwz)rka^}7dsb*rx}-cCz?1qB}FWt#a}%()K7X za5hY;NG&ri)HX{Fj>`A-b@oc}N-K5r%ke1B=PC@VEHQ~JsB%v$^NTR^EG&;GHp>rk zG))Xn&rET1t8jHRGIFtq2=H=mQTSBa(Pj@G=66FchqtNPl}^Fs2m2PcNklDjPS?)<** qbN5Ymy)gat!6pmi$6rcY3&Au4i$&f1+uXU#YXRe_mpG zF;|IcVwzF0N2p6sp?;)`r)OS%o=ItlsYP<4X;ffXs#jUAS)^r7_~fZcZ-RNx|+u=Dw*G1x~@iQNd{iMaEH{837T+Tme-U;bDel7Up?o zUWUdIhQ3LL#g2uRRS_A<89_m10dC<2?qMc=i3X;Dlj9l1!=q9w(vvLGohkxSjZ88# zvJ8_;4Z|%B68)0XiyR|-90R?BECZYzoh?E!*h~dg8fr6BAv4CpkE~GP5u^Co(X=I5o(l$jvp?E4Lye zn=3ifGR!$W$-uJ0UAxrG*CV$q!^73Y+1=GQ%%~_kyEre@B0V%RJ3G%KluOr6p}06h zH#Nn`)YQ;Y!QIp+!n9l=Bd;(#$Gy1B#VEtb%{#a-x8Aid*EiAMC^I7{(AD23#M{Uu zvcT2dy&}kh%Pl!HB+@+4qAJ+TIk70rAl*OBQ{U3NEZ4Lwv!Ki(+rY%xKPRBnGbgYZ z-MVCN(`=7&g`5bp@SmYqO@{Lmwf+VzeKnET>Ws%^obwE!vn(1 zwH<>i%>4^|^}Q3rwZo0mN{b32i%jw=!UHSA$}`MU4XZrDlAIzexB|nSbBuffGTil( zO9H%Iot!;Pv{Nj~iZhBGjfzV03$ueWjl(^H{k)S6CdV_1hZ#qjMEa(gl$HjBZ@7v&fPn}nMCxQDnWRTP8;a%HDG2c-BI73FICxda#{xd!KW7$ z8I@UXoM+%$S>+ZPo>`gCmF(u5m7<+l8S3X&X_{r=t{-BS=BVu!VCj-??rCA>W}fJj z=$cs(8Bm$erK_u}kYeI)9%Wt`U{K+#ZBQ6goa|qi8s?RfSXGp%ALVBm;gsZ4Z&BiA z?Cas`%XM;{+ad4fZ6_HwZu|e^k=~W$)V~c**MFZ8J~;crA(a%PR~C!knAj{y_ftE= z<7T)f7_K~T4nf12%RiZZza=#AddsfkT<$=fCU}wb=!zEXq zIkikR`I#v#I#==I%a$e1M~>QTF!*?lyC|N%*?)<{q<>~Gst5Um*bon-zwlKye1s^%}SzL9={iWZkW{4V3%sVHTrSXY3 z$52j{(cN2N;nUU2j{INz+tG06V`-@m48C$jmm{XXWO|jMX=fNFxOGcG*nZiNZy_f? zC#|#);9*xd8Se4B`|sCfS05_JFLq5d;hAN6eBGyVkNv7*^*n#ow3O23Ze|w~d+BAi z)T(ot3G1$qBrSzyM&=U&Pc~fC_q;!!@AZF$$?lR|VfP|7NNjWyoUwuT@6s33*KgX( za4y$dlABV!kmY)vb)3ONEEU9@Vv@&5Hck%W+ z7RxgQga0cO-ZGT#zSCWwoqQmFQdx))K z@chFXOUbTG_u397-+TP~o-PWBn1AH7G$Z@k={iC(($gMm3Gt@z`PMCb6YFik8tK+6 z)zBe-`_`wYQ}r_=lg=3Vd~7)_?Ow6-9Dmpp>-P>XCrtl*mz&MO`}N%>hWb~FTcy7n z2p(Lt$dB{BeZj4Woh)CrpYrsLa^##RV)nWxYj^L;>GyZ!XE2pdSm42acD>i^-d0Ho z-fPL*FL&2Yf8!hR`G@WwOYMBW_}x_>jzsM=xH)-e=`#XtWS>rQ^T+hXRdr~*b^zuWP#@h+iHj^+Ei_a4)1_+H^I^+9-wUY=aXvF#7* WI^rY)RC9gQw#RNUH22T+GXMY>KARE% delta 1500 zcmeC=oy{{rr#`pPu^=MVCp;=duJUH7kqbM~y zzuY9b#5X9LE6Ox6IV3pJGsw6ytT@~y(>qu@Jk!wGEHof6xFRva#5X*@BDW~exUevL zay+AWxQB^ZMQ&}H&ju27ks<>()koE+|E`Jgs_)@zU|L`qnVDjiS{P*N>c|zGYHpB{?o{Yq9%!0c8KRwET#=EP z5|mNslUHeyT<%xt9#~=WQF%HC zmhD>XW*Fh=X&#lGROy|XRn8Uu>akAQsf54v4W-lHgg!sxR_XD3kCRafi_*S@7d~h? zHr^B6``=o~@n~gQ$28vLrJ~zc*X__zy|*)!SIBB#^m4}y@~cCxeCUWOI$?cN`|78} z)u*Pc(btT)!oD-(9^38*y>fPk)-E+qReqddcwn!`>XXxd==`=k{_%*<3Cj+bw4+bf zOw&mEdTK*`;If8AxmvB~@?CZ<4&HTdHiu(XmSvH0r~aMO6I-=%gP8A|=~}#)QpeBu zjK%98*X5Gsd(C|g$a6i6%IQex7MGd-)^g)53>!4*js6adLOD;`ViC&2{SMhs|AFQm7OnF3SAL z{*lbK;N|;@H*Q#wEO+zc`xp(MRZr?GKmYj~CF2*->G#^<;0}4?YumV=7e0wiS!>S0 zV4G#U<66u7e_yM$jwmdf*ya|Vt`T#8DubbS;7Y!A+zc^YOb^+!ZY93ou#W$&XvI;> zIH`9#ew*p+e8}M)3Fj7u3y$3F%lv$2>@<39 zomv0mThO(Og$38xo`ki&5B;=d#z{3BPsggyp7tBKPd#vY)p_X0s|L<}N9IZe)ZfzD zdG$}Vxws$8UERqckIyJQ6;=;c)1E)qGR>!_{@=-ISA*)5R@p|HPFv-+M}x&$P*wbw z*S@ur6sCBE+}1c1<;k5N=_)Y$`u>$GA64HMxHrM$lJ(XzjSqerd_6H&V#?dkcb;6B zo4&azz4SW=$F%s<AZjL@0(Q@>V0=Syv**v`tN+D1ZULJJnq=C4k7Q3 zN~WK$FXkoAy;~#PGb>`QO!f0ybM3d)`R&h)y|RNT_2IVIW^G=XrJv&^I+-Hx%ZMGh z{_VMX!p-+4QTwNAOD-~-tNU(+?&C_Q$b*vd0*dxpJsWP;#4#6F_zK7UT$508vPtml zX$!eNYmItQ*+=E(+E1je%UJO8Rd%Vo3bUWLj{WK14Zj%|Nqm^B7~VQ7jFSwoxmLGYY^Xo%@ m+^>GYyX!Rh%U3OG^^lP?T-emQ$xEa$+e=_0^Q`EjzK#Ieq=|!X>mx9 zw~L`?d8U&iS4D)MeqKqEd0JFNg+*ntm%f?5TTrU2qgk$(XJvMCkrr93l3V9)(sYRvQ0ha#W*)Hy827XDtnaRmcq3K@U#*T#%URma*LD~k!`GFM% z$IXeecnx&Lh7W!x#mV1ZhbEQT3l$tqu>xWc% z=H)x*>iZgcS6Gyn2U>=DMdqXh6(o6tTBej+TIeT-PL5|34=;%dF${7IPS-B4axth3 zG0!fobPDk$%qMSm-FwZ@gInD%}3u9fDyRZsK13FO>An>puY pvg6EC*ErMrbQovz{D0(J8|dg*8)76{`h3w9#(ws+BQD!l007hu_80&F delta 627 zcmZ3^wv%;&PJK~vQe|YQPncz}es-0Kf0dI@pl4WGWqwJzcTrV9RasVmUs$=BWpaK< zK3Aq?TDrSqWJo}Tmq}<&T8VK*Rk>e)n?;$0sbi{TYJjg_c!8;9IQr+Z16S#oxPwx?72 zpB1f-4%Mi;H=ZG}VG@l6fY=crK_l!vYilE?hF4L^w zAit1IgJiSvRQ-G(r!Z>vOG3TV zO@d8AOT&HLT}vFp!_o~363xm&s=PgOj6y1nEj_bb-Hmcxxq_U7v%}mf{f(kZU2+^f zO7qe!lR{E`Ee#VxA}aiXyv)rlvt13G%?i9t(1Xc0*v&X4T_MmX)7`SzI5$1b-^?Q~ z&_cgHC^fSx+dU_zI4d$W$#MG>^%F@8e*DbBcAe2j2S63k|Sii{KKfu`2-Q6O|ILXius~>nPJMEPS+-+%h;g8+k4vUORd`^knX_}Tb9$<0WR_t>L1KY-V4;hXMOu(+ zF_(8yX@G}wMPaacd8ALWOHx{zN2P~>lTTVfnt7U!TZmh_dqF^%yLNz2I+w1ULUD11 zZfc5=si~o*LUFodVy>e?Vnu;TYNUIjeuY7pkH1%5kz2ZPkc)-3Q>ABWV3b>FKt`T+ zmO*&FM@FRums_E}Pp*fLc14M!udz#HpkGl?R46qpxh3I- zLHYrH!D-1B7DXu*dFk$DSw^mjDS1X2`aV8I=4BSCY5w_zrWF>G;~B-ny^{?M3^H6Q za*WGUl3k1feWOy-d?WOY1H&ys4719zv{Nn94gGWc%QA|&%$?m0+#EwgszMBWBQ2b> zQk|14E6U2EvNOD#3v$x}(=CgNa{PUYg3{9`pJf!Uch1sJw9L&7C@GFAi*QW#F9GfqtL32~_`HxJJWED6gi%_#J& z46z8vHup_Rsqn7w^{EI&j{z?)L!a<;g@6!OA15Otb8o}6+%ngwD9?HWpW=+Fs!*3= zXIB>wUoYP>$JA2i5Jv;2Y%Xm>i$vd|bdMDC%w!M4)RF=xOG~q`G|S}hOdn7G+@LI@ zBo}?{D(4X2Y;^0AU7Qk=92LT{or^pzBE0iMiUaj4J+k}~%hU5pk{uoM4MKfPOCqxJ z>P3-je@)#T^zF`LIZpXqq1_m3*B7OU41MnEK@99 zLwzx<^ES=)C|7VzF^J4_&haZRt0+ych{$uxs>;+h2+%Gt@{MpQHP{x5#&~40LvMDNZhOw8%>>G|8wmF7hx>sxa|z)-Ul)2{l2t&e<{C z-6T*UL%*QV!Ynt#&D_GPFvT?3Ik~vR&$%+)(ahP>GR)tg$Spn0xU$Tm%FUB2D5@ke zr!*+pH!VM<(4!*J#l$eIJm0mVKES}BDAz2nEThuT!aUEo*r>u4-MR`#b7M%S8y>ebn*BEzs;bJ-cFoSt=F-*GRd6gw@$}PAG;sE>&kXh|a`SX4$TG^a zG%t%twD55?N>4Noj7Ur?4=PA8bL9GOsdCV%+$~{q;gl$GQFGomU7^bhj8W#s%8%<#Q`sjM(z6-lg{CIt5#-gWQ$68}eR)lCxv|H%s?{B3z zQJIgKl{MF7%V+6@LEHzn)_q?v|K!#kEmU5F=!_MTJmjsr0 zq_26`9pat2T8B-++u24ezw+`%#rEmO2^IAc)?bVQESFRtI$tEU)1lRz>&jQN!dJ4b zu2D&A)x__}2_W7QQnZ4gucE>c6{4b_CC)V<7o5=4=4B`_D z%`fsV@93Tvop^lnhefN@WLE`Q&&<4gY}&hXS&R=pf9UItG+lFBDR7Qi;?+5Q>aM@u z>n+M&P|sc@F!S)22p@rEjjD?dE$h%YclhD0^>fz7usry1?ARv1V~6ziDO;cA58YIe zvHxxEucbU5QkgFwFSBM}w>;3ZzluT9}Qi$ zI(F{4@{BJ1i$AO-txB0TuU#$4TKasOnRJ!!5{@O;Yn>ZxXP*B1JSlT=gZ)YGMfcD3 zK3G?ga56p9XH8PW8)0c6P{RnoWQZRc#AmKm$-`EiG}BYj=QLfpJP z-*%i~Zc%6TJ}RY9@8f;g_V=q*KFLeU_Xj)uws3ghsu2~nS8L(=sFo*p*B*B@A>?YpH2svY&gfie?W<)_%zN@}pnu|qtnUS+)iCa#RdA3KHX@IkZQL(#mhO2*w zhmWC)xo4mwS4feCen_ZAdTF+qYe`r{WrabxMTovnWu-@vOGII1a9X&jOO{uXx3R11 z#E;_P7M77^PDRPazM+LVPC1U+Q5gZL#i`*XZk}FcW{#CEo`p$XhDE_Cp?SVsRr+QY zE|n(PenyE-IVR4Drr}w}Mj>8F=A|j_mJudJ2Ek>yh8{+qfl(%t;~B-nbHlQHlO2=X zGfGo(^iBMdT?2e93M~rUlU#H3T|Lu%gM$6c+{28^EvgK-0!)M4Q$31Pi;PQhf(re@ z9sS%Ry^2f=%Dr<^z4H=_9knx4EXt|^{ftc}pJf!U56v;p4%W|d$_eoHD=i5v3y%n{ z^b1JKc5(5k@=x^ej&wGw%-0SoDa**_DhWslwg@OMjxaV#@eC}_G${#p^$l|JD)9Dj zH_bM5vrP2Y4{^-%G>u9|j{z?)L!a<;g%s`V;`}gw?TDnvP)8HDAdC8JXBQ*a6vNQu z^0I>P49{>^e>1Pr$RLYcN3P09SBtXLGXK!*#58Sp*MRa|Ggp^z5ARH~vMN_??GWP> z^R$xG;=&5IY;^0K9mCyC0u{1z-MmbUlL9UC0xI01{7kBRJo21foQ*6JORDsPoHCqq z>Wd=Fj5E?JGA+2wv-B;J(#=zYDvkUDQ}lDv%e|Az3taroLvr#>$_kQ;iwd1hN^{-J z(|pmbt8g?owscg;b~ErWuPThn&@WFi2{cPe@(s$gEKW;GPw{tgHw+0%P6~0-_m3*| zDYxKqPxtXp$@fx8F3mMcip&bt4$4g`(zY-TPfsdy_p>NUGV?4B_RX&h@k)xEoX8?x zUuc<=k>^zvq90i7S&@78PV3VeXmbm28$D;B975lIs-6?U}OfzotBbfP*zdSrK_u}P*|9mZ=~<+pOsZ= z>>pO{rJs|P73LU{T~Op4Qdm_G?CcU&pB|PQ8tmki&$VnCkKoCbM_a-_t+Lu_c6eRN zWnn2j6^nkhW9|Yi%P(H|wJP4j>Q+K!$eg>9ug_I%UUhR8-q+j`sLsn>r$)yVi83u;69uY*VM}56z0UEHrz($>)qv_4h?TN_+R8e`RT(r(~9!W#+ZE z{fgn6>vE5FXQXo`)~8%u)w^!*CvVkojq+Ei2VOcQ`C9HSk7RJ+k-C>}9{kaKxu@ZE zgF1#VBZEi#*}1~SzuxJ&kXdlyM$^Y!Uf)c6`%=Z?xv%c;vD=)K<)b5|_UYyE&kHjD z-u8d?l;K*@%p6TM1I=%90xB#DAG`V$&3kp~pAA=^#96-Z(@859`|;GzsN1n{A1l*W z-NZ1-ovEVR*6-0ff40w_;p@_ur_Zc?*qku)@%jK;k4Io zvs`X5t~NZ?k@V&s*TWj#Q%Uo!7Zq08I8Xb$o0-FU?Uu^^XDTO)&h;6I)cxMBUN65n z$01B7mH&s#J)Jx0*85(qvC)e8XItN+^PMY(@=6qk7HtWd*rsoM^Sq*8M z7`vBCZ2Bcu`Qou?jd}%Nq2Q8*VYMRWZzI(o^(b$Q;Ge+sc+yYz-|8N@Zc5jeJ$d}u z(*Nnod&;6!Z~vDWe)|0Ga=rfDo)4P+JHw7?{@BBi&)>W1zLbuTfW*Ilx2_kO?XNre Ra@LHe1(x9-esr~7003vOQg8qO diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age index f36689722b0c85a09641248e9768b8b67a758c5a..dcc0b5e8cb1488046cab67136cb15e047b5d48b1 100644 GIT binary patch delta 777 zcmey&`jBmcPJM7xW{$RgroMNUQ$d(XT12^%Pex`?N_k|kV^O7_nQ=};pqp1(uwSsV z30JX8K&DZsS3$Z*p>KF;vWKT*h;v@Jt3_g3aBz{cc2Z_#VPRNakx7BECzr0BLUD11 zZfc5=si~o*g1f0tglV}#ae!r5aFKqbV^Bn8Nw`@_QGGx~sCS`pvX_3SYp!KtQL?^A za7k{OWsr*}SDL${fu+7@euzbAL0NFIaix2nTV_~6lzx!APo8Cldzn|Bhhbr|p<%iO zx^>Ckrr93l3K4l#DM_KOCN93_7LjQtx$fR&6+s^5B^72V!7h;&7D0ukB}u6vNqM=! zu3Sk0<`zK#Mrr=~;pN3XVUfPs;o%-R#bvH(W<~ni$>E`qS*~7Lf#!kv1`|JuhnJ>= z`iD5@2YZ&MMU{HGdzKmJXE+-;8yC2FTeugN2J1%zIT|NMg!+Z2bEOrR_@_pBnuQ0I zNBQPC2824gxVn^OX$N|j<|n5*dglA)l_pwPxD|wjPL5|3509!WOshyW&8oC438=~l zN=hy*Gj+@gPRuMxHV-okO-{-v&-5sFboB_z=Sog9HFx*OD=!Ms4hl9)HOdMP%@4}e zb_+7|@{Y(#32_Vy%!<@b$@legoP3s1yuQ4wD6`l-$|K#>z^t$+*{34UAh0;VH6$b3 zF+A6}!nY!z)Y7;x!zCoblPlEE)U6^nC?F^z(%8cy$h9)CygVmG+t)1H(l^voKh!bJ zSl=YX-7i1KgiBXfSHUB#DBQ$L+g&@nHvu~a{CUzy&dr^42jNr15>>e_cTaJ7XWY4$$w#hbq!>-iu`l_!1PFn%4p%y0q delta 685 zcmaFJ_L+5pPJKyASW%Uav1?JKv#X0&aHeCHM_Q4wqqA92My0opPpV~9qPKUVep;n# zHdnT1l2=xNett@5cA%xJQCdVnwn?YNfAVil2{1qy60hg|wLUD11 zZfc5=si~o*g1f0tglV~gqd~coOL=~IQDCK0Wm;xlm|Iv;Wr2rjSY=tIceb&=vA>13 zkyl<=da|=8SGG@PL|S0Bv$jQGWwK9YrLVqOileuyt68vnMR1f?uz5hKe@I}Fp+SV} zk8l?YLw!qUb5p+v{emjT$!8hG>m$-4-CRwx z0^G`y^Fw?iO3J;`i;cBC0(=51f^y5M48zSU3ti2#f+O8sxlAn0GhKWOGd(K|OSO}o zN-eyjT>RXOf&&WNor}T>v)ofc16(Y!U9((G(1Xc0*v&X4T_G%~qM|rAEx)q7Ak!q= zGCZ{2w<Fw`kCJts5L!^J#2wcOIt!`Z?S69Ky!!yQ6*~4u7`(DL~H&1&d8jZ_(`n#uf6SEj(7jdbl**x>N?l_*hPVp z24B7k#P)qCG`OW%sN~&n@p`pWRDJQuN6+qRJ ssh-ed25519 G5LX5w HlQ4V8lBd3im5j8KHEuQZBTuztvPj1QoWdv6FL6qzGI -Jpt91X1UIIVFQt1X6Q//kALn+Cetp/LqBZZvTuhFthw --> ssh-ed25519 CAWG4Q StnngJAcuAwUnTrXDR3nJ2KFN0jNdTqSz+/1TfmWkzA -CR4AQ6fqaJVY1mdUIX1gzaZwRs1sU8F8hHztnkN8vN0 --> ssh-ed25519 xA739A xya5A5t63Owx+VrGgUfV/lIP8b/xV1cerMpuZBLaDVM -w+pA583yUnFq2AvGBGzWbQIGQEY9WqW0CSLQ9v+SG0c --> ssh-ed25519 MSF3dg aXkLxCyYdOwVopHHmpXEI6WlAIizKdJi4IO0KEdhS3s -WKXkTszZN66+QZdSDJ4D9q7xgYWMfliOLCubIF2Dqkc ---- uVWoU2lMkqQ/9Z0BqKRCeUpsKi8lwmHukT/FV8wYMbg -1G+6g[|x]2Tй CKu)]8֓lSQx#7rk{*3ս~CbڵNp]J]hje+d%Е#m?=6} \ No newline at end of file +-> ssh-ed25519 G5LX5w Zhbs+NM/SI49qQ0X8bBpWUWxYM0vUKCXNAnPpIE2NR0 +CkBUmJ26EkwHztT8Pz0UGq2KZwN0Xz8iYQ9cEHL9OWQ +-> ssh-ed25519 cK5kHw 5KjUXJywRDp2A7l5ukTCS+WIAalxwP1f71ejGxwNrX4 +JW8OLmfkULXo9AwYMGNyOgZ+nQ0MVc0PCM4kKPIo6V4 +-> ssh-ed25519 CAWG4Q cVjY3R0ZHAfokA4kWlu5vOl2Gs7mdqRgRk4WSUOXAjg +IxEDvuximW99EqxmpW+Btpm0Zydmwg/u87bqnl26NYc +-> ssh-ed25519 xA739A hmuwZuxmJnuAjmU4X8yhPQ+hPWvN1G+ZS0pvD7fHamg +fnAPW6ZCrv5pSO4RQhhr8xz7ij7jAZJk0ApWluOXDng +-> ssh-ed25519 MSF3dg SSGLcWnum0Qo/0OnKDZVg9xAZMwGwVNYYmRJXxb4GU0 +pdl6kATG7n2oMsoUboBfu+vDKurJcH1UvUa70rfMQkE +--- a2ZQAeAQlO9DWnegIAq6NpI1Po6f38l+hitZvq+zIW8 +\ֺ"^DTH3_|.h^ngS]_?nz~2!p7<ʨD?~F$`q+SW(+Pcu[m`OܛϖT \ No newline at end of file diff --git a/secrets/tent-gitlab-runner-pm-docker-token.age b/secrets/tent-gitlab-runner-pm-docker-token.age index 42f0530..863144d 100644 --- a/secrets/tent-gitlab-runner-pm-docker-token.age +++ b/secrets/tent-gitlab-runner-pm-docker-token.age @@ -1,11 +1,13 @@ age-encryption.org/v1 --> ssh-ed25519 G5LX5w sg9SmahxBg35MDIxhrp4oHkaTaxsKoVQju2eNhCt0BM -CZ64dEGqz2tbkG8KtimZvLUEMrQpVVBJP7Fu46WTMgc --> ssh-ed25519 CAWG4Q jzS1R14W1CWxdziMLG/yCGPLWSkiyE+9lqyCVe491ng -acJo/nhKq3pSPoFEPaFLN1fzHHbEzstNoLtohWAHKiM --> ssh-ed25519 xA739A qeGJoLeSIQwLU2Yg+Gi2bikHJ3HscLfyo1msqL3JwHw -tTwaxRBKTl/SoyY/LnxR/j/5WvCNX5VeZLKi018YMrY --> ssh-ed25519 MSF3dg Wym7Uyf1XvH1H6mNDERkO8opkMiN0zzXm2PjXftEOWs -Uw8ZwwKIB5UqgVuoSLE2QajNDJZkH7/Y3Nsy+WFl7Xs ---- 94hGVbYiCGZdMEJesCMLh7IZi+w5l/Kr1lZJHQgrc0o -j5j磛Ja]a%drFDT^Qs/kwB$$H'w ssh-ed25519 G5LX5w VKM/Y6Wy0gmb2gc4Q00VzHQ4IAxfSyshuDoaAzlEkFM +vf18uoEN5ZLJ4HcJg85epaseh1CRL9/ncXtU2HpH+QE +-> ssh-ed25519 cK5kHw sMuG07kjlI6VjPjELOUPzkn+KT9Yq7BPf0zSATM2aGI +/eODwL8KwyVgFjBK2MJlbqjN7mEvXCSsjq9D96szrng +-> ssh-ed25519 CAWG4Q t3/Ty7yCqC5x8KQY4VaHSQ9Q3epqMpXoBDKyKx9+VzE +JwgUsqMd+1jFZvFp9/SIoowbhSMVEkKp03T69+OHjho +-> ssh-ed25519 xA739A 0ohmKK427+4vupivrtjXp0dDK8wT4XUA9rWgcsCGKgA +msbeQyz3pL8RLtAeXX5tsfyHyOXxhfYpqaLEKnRxpPQ +-> ssh-ed25519 MSF3dg H+6jAoP7/Dxp8C/7Bk1C4CT1hpkUhtbnTWWIxkO24Ec +SrMuUG93T5lUw3xINEen5EEKLXJizIGFhBO1fVroFHE +--- tIPnH9cxTV3m3qzvZB97Egz+raWwZJ182BXXKDu8f+o +f#,|Ey.vDLӺJPX`-#FUbs(Q!?#xJG?5~6MA UCM$+}WNϨG!a%ǽG \ No newline at end of file diff --git a/secrets/tent-gitlab-runner-pm-shell-token.age b/secrets/tent-gitlab-runner-pm-shell-token.age index 2d957a7..74527b0 100644 --- a/secrets/tent-gitlab-runner-pm-shell-token.age +++ b/secrets/tent-gitlab-runner-pm-shell-token.age @@ -1,12 +1,13 @@ age-encryption.org/v1 --> ssh-ed25519 G5LX5w 5K0mzfJGvAB2LGmoQ9ZLbWooVEX6F4+fQdo1JUoB3FM -AKGa507bUrYjXFaMQ1MXTDBFYsdS6zbs+flmxYN0UNo --> ssh-ed25519 CAWG4Q 8KzLc949on8iN1pK8q11OpCIeO71t6b0zxCLHhcQ6ns -uy7z6RdIuoUes+Uap3k5eoFFuu/DcSrEBwq4V4C/ygc --> ssh-ed25519 xA739A SLx5cKo0fdAHj+cLpJ4FYTWTUTyDsCqKQOufDu3xnGo -VnS/WsiSaf6RpXuhgfij4pYu4p9hlJl1oXrfYY9rKlQ --> ssh-ed25519 MSF3dg c5ZXvdNxNfZU3HeWsttuhy+UC5JxWN/IFuCuCGbksn4 -vcKlIirf+VvERX71YpmwW6zp6ClhlG2PR4R8LIN7cQo ---- pJKICDaYAlxqNnvHIuzB3Yk7tv0ZNYflGTQD+Zk/8+4 -h/\JJ -0? p@܉73za',kaIXXOZI\ BP/cUɿ~BS' Qfer^8lVE \ No newline at end of file +-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY +RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4 +-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE +Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM +-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE +FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU +-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU +sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0 +-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc +8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA +--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ +-b#M.@tŵ}+ό#@ky?vnT+[Q gA "qh]WVoxD](S%IU_f2d[֐pS` \ No newline at end of file diff --git a/secrets/vpn-dac-client-key.age b/secrets/vpn-dac-client-key.age index 4ed52512f11a537b76b59a154feac1887cbd7c04..c414fd70b5ee93365f2e81dd647bb70b21051eb3 100644 GIT binary patch delta 2298 zcmX>mxJ77!PJLCjv2jI#ft!1FS$;)Dx}ir=n3EtcDBB^hmpIbX;iXH zGM7n3MY?`saALBnxsh3kzN4RUrEx%VSz@7Ss!yh`znf)ZS)Qk-N4c|cD3`9CLUD11 zZfc5=si~o*LbA7Mwnw=_h>N3vcZIXBiG^=uP(WU&S-nfCrAtYYdxd^cR#uT?rE#`T zdbqZJL6V^-S7nN$sb5)oV7OzEU%q*$PpWZgUZRn!OKNy&X>v-imvM5Qk!P5hV@4!~ zbE?7Ums^x&6nO=kC+GO)8b>&C83nm#hL)AO6sA{I zMHFSa8T$v9=(|SbRXAsdm3W(1`UVtvo0Pg5ghb^}j%O6F_w_GN&UG#~b=UV!Osz2R zEe~?fEcTDgNUw+tHx4#Q_b)Ll&hYaMiEvHlafaLEiQjBqmb%<&6LO!CkV^>j6$ zk0>^GbTl!mH*vGfFQ_teQCRNb58vn z^C$VMC+g=$&V0yYGdn!Cv%R;@*vjc#S^KwtNAIojwmbUBWQ|Paiml3Tx9Qx|X1o~4 z$kaB$vEDZ#M}$AWWnI~X=Yic%Cx+?GzaaF?=gWTmRfTy^Z}`-6-YHyDaA8i5Pq64- zyEiVsqtBQyd0y+E`_cH*?#NevfBaVOcqz7X?d5H!-^S%w)$NdB@%XdxUxDz=4G;Q% zux@@@cTMa__@|?sQGYIdy!$iWd1J%1wJwSoNww{~WexK}l*{!y?{)j9o9A!)v(!6D z$ww@QG5e|Mo-o(_|BtVJu2UbnWbT#HJeKXhCP^N0)a;rYYQCfQzKKiVg{>QR1P4qn zKdtA;HYYt|i^9B}cM?`FYfIg#-;iiE)q3{t{Q-;OYo+$Gd#u_jFIprWbV|EVPvOJ2 ziD$kj&iQ=ehz9PyWu_wbg%_#DeQ4NXiw@QI+j(O6^MOum55r zmz)=_{@H2cH831`bp;|H8>pmr#i9OsgcWQ z*2(8Z)A{G_)v})ZzdYsFlfRExa)hqFKlCf6eqR2!X99OLj^1yVoi$0!y5`q>mLJCxoQ3jp`3hBs;ncaHiA98~+d4h`B z!4zTW8{I9aOPTiY(DFwAock+^4?$d$dO`SqH)ozI`K zKGeLh^jN07_u^jd#L`y1eVsKrvp(l;pAo1WbUmU`hHu48{_UTgBEHL=_H^3BcxTnm zv>$l_F1)M5{~XTV&ttE@MfFppgaMn~%v2+nNQ)P1_7%=es9*5uiItd8{lun{^`~Zi z^y6&Pw0(6?cH0aW`{;e4?}DV%uT&h_sP`pj-^4F%Jq=TNTUIZgA89S1y;ShtUB-JM zhs$p*D>u;Fs&bX1XO|uO#q~0_>(s8N%hXCO*}36wYTo+p>Ps1qzE!^|-of%;Lb&{0 z7q@-L5Bu#wxy8HoZ(Tm6UU4_qmV-fO{FoM&2-hWR-{WnPShD@w53ehq67N@+XH-fg z2DLuE#?D*SBg(=X0GBj#>3DK3z5H zoVe=R>J8lCuOsGUo954(-?PBKeO+a8{Hx@Sqb)5{f9%PO+AcM#rs;`SmyX1|4!J-fp$BE1TOK|5%FJ$eM8j5u$^H-gkj?J0D(M;b zS)KA{^)JqK27YN)JN80P?&a;sZ>|Z=k+XCTDW(+fSP}o)$9(>}`VfnSpFhlv|CCzN zJUe;*1*u~~lXKbACD^(*E8mD^;kwY}_A~I!r_09{G0m*;nO^zhEVt~cq>`d5T=}jB zv*a#FO->1Uwtwrg+@@dy&8ua5{@HANx=xhu=J$|i47LUJvsX>8)=To*!gO=aI_=`H z^^?^2yK*I-rhjqp@H`&4;#JP1Fva-|7L(Fe?K_h2>+*7;4aFzb>e<8_;?L|g&fPmT zE8qWW#7;*3D~tE6`FGoV{ePK18H_6q1|FT zYe{a{6ZFWiE$r+E8jgY^v~4tJ93W zm~{Hz)2!F=(@yI!Jd2o7@oR<5=C|uia}566bE$b%xP|*#WO}gm*RyGUYrQqo%D56L z?JVB!&)}4qSlNEAH2+ar@S-UBUArwgMPy35cixx3B+wIE`AEih(q_AzaV_s|uQU9< zI-u{x=TFbtrVF^G?6|_l8U4{wesRRs_babv$Z17Yw{E+-apJ$}dmk9H{S7yl+ax(> tfDX{RwjijcUwXNGZv)_s3cLg8-zR9FPD2 delta 2167 zcmdlYbWCu9PFPWnQDJh1WpYMom_cBrc3zsXo2P5Op<}VPk7-13Re^p+q-mzUhoMJ= zBUeOmP*8AUc5abNdZvZBc3yZ`zI$Y1NuIfhL3nOOSYlN{Qb=*Rqic}+#19hT*;T=g z#*yw3*%nEWsm@`>i6)L&VL`##p`|G%-o}}3zBzuT8QG=&p@E)UNkJuvNhu+LrG>es z!D(f_A&y1GMW)`~ss8E3C6UQa6?sL0zQv}gkyXW$;~B-nGm|RZEmI95_1&EULQ(=! zo&0>XEvpiD^ep}lLPdf%uCB$w9S0Vf|9(m z%yK-E^D;t0eN$3{qfGSmQ%WKypJf!UPc+XCD~`zU$#TrE$gwERNG|uwu}CSbbaSgP z4D(K|@G|l7D@`pkb2hNx$}KHP^Q`g>OUlkJj4Ck<_p}HP@J#a$2}_PLbSm~OObaph z@Qv~?GYw7V($&>f2rjU!^zb)G*G~&e^YKlpGD-_C4JfV5P4xH7DJ?3_ER3wI&+`q* zcgwc)-V#554{i-f8Zm}KQ5uDTgEnNv$t1RJl$pU zRo)}q|2fOWFPF<4gm-+pp})md_nmBfN!v_QmwRT#VyoB%a#9^vv#rRvQQz2Scj%_? z*;{k3>07lce*LGuc%iO)+`b)WxOaby&#dP;sGR;uR3gl$TRE9IzV}J=vS3cP@P&KQ zi(t zOp|n&{XpWgh5Bc(Rt=#`z^ z9#OaIFV8)&$Bg%YZ-w#2_4h>Y&+mD=(`D%vGskPU)=a%BFR;Zb_uHg&>3xSpQi5_< z?~hBIvrkumo8|PUneVo&4dS?z7%BJReZivN{Rwi_oEde=;Z}k#ii~w zvSDGHg5idF0Xehwzh$%99Xdr>>y(+Ag6=2nfXhYOJx-?iH3?UpdStJjG1quP&C%KJ z6L&CQGL6)`u>4((roo>)@v^DfQ>Gr>)5-25vgZhY`kifS!{p4a{$ITD(nZTUmX}Ri z_r`ll+D(J8|Ng6 z{$dqNS*R27FT)~S>HbmPk9B1~ZS^jh3%GFJ&joXa>79Gc8=; zxgaBcOW&+HU*v)x1r*$v@JhXPqGSN`OAhV06AMcsU(H}zyU?KN^Xhc&DPix#t^7+I zOf)%V9S^QvqxEC?_1|er&bP=(@z!s*t-Y$^7V9(d(&4WQKBpW=&04nLu$ZY-UP1Wm zL_>QIyS02Gs)r`)_8I6;a4D@kId_tc`dn#Q8JowvO%ZC_Zu;G_vRoyzE!alYv#xag zzxg}%^74u$?~$ry7jIhI<1cCWKt!_Le%DO}KcUjIV#`f$ue&K$9V~l6GKRT1sQz00 zH=pve{?cwH4z(=l`NP=PX3ga2o=Nx+sHvaj;^7h+q-b?jcSI^%f%D?3Eld$=orJI|CW4;A^ zY+b)C=jO8CHhIOiZ2E=;=Xk`N*6yCmaY%fj&b9@UKHirLwQJWaNpx3C2)0hLXT$<+Zz=T&CFhG`p)=?tAQI8z7)z z_CjbAuKTcN2zH)qJ7w5NCtZdP|=A&O9d&?!&?%WY){iGmt)tApl zu4>ooTyDG9!eyVbzOeg;yw5q`fY%c~t@~x9xxLU_U2K=q(r;`&5nJ!h-m!3dN0m)n zUE&Hs_5j-e(Y=qlJOeJq&J5k4Q@A%*$*4$uTi2ftWy@Dr{ne^}HO(WA$)3x)PUzOs z$y2*03gxn@rOwrjw4c;A?ahrJ7d}>VZ+&uNcj79yH2#avHxxQt2(Ua8aq)H?pROL0 z^M{5lR6u98F%v_LmzDXRg>PUq5-TR=ukU0!uXwcL)V)j2LIR5fW@Jpb`G9NJKc=|{+%AL#PF=o; z)62bWuYCU@iNAKsCF(Sbwg1;>wkw|HTJGoSpR3IkYw1B zhR-^j$)W#ML^jk<*8ZKgXUB_kQ!a4Ko$6N=_0wnl1^1o84)^qw3?D4Xyy+iN|7y9T zu+NQsEW28HqE?m7h;QfcREkxdyU=w1rjK3UWmPiY#0DnlTzL9{d;gN`c@F2=54|Y= z{Ab3J$9$&(d`>Jm=H?MIWt(M6deszFr(3S8?$~++UX=IU$9+RPS=l!txC;L8vmWD*eA~4lV5&@cE54U b^_xK^Y5_?%eT*Jjemi(7d0+R%SEm>O>qF5a diff --git a/secrets/vpn-dac-login.age b/secrets/vpn-dac-login.age index 9482a4d..6191ec7 100644 --- a/secrets/vpn-dac-login.age +++ b/secrets/vpn-dac-login.age @@ -1,12 +1,14 @@ age-encryption.org/v1 --> ssh-ed25519 G5LX5w /RF8uZ/KahUqjEFILbF3+Jin+U0SQdoQChcc9RJ9axc -aEmPk++86nBR6d2BIa/oaUdyiLS6cH8TUoYJE3bxba4 --> ssh-ed25519 CAWG4Q qHyh9nQi8c3z/KHby9y5vhzN0Dwz0zca98ebjJmXrzs -ZbmwNzrSSQ3RvskE8SqcBa0vMy8pzm/HPGHLm5zuPGQ --> ssh-ed25519 xA739A FlGbfS4bUxA3gVDzb3yPjp4hV8a7aiNBLUctnN3bGEY -3fI6SyVjVhh2M8uc/XV3blpdQMPMYi2qzaHNXvx0bvM --> ssh-ed25519 MSF3dg 0Bs/aW0nNISS+93It75o6hKZWa7S+LF5bF5ApsJ2fQ8 -y7o0KYDHEen13ndIxg/mYil3eMxxzvYF2pWqhMb+rBU ---- Iqo75G4+02Y9nc1OOkcEx+iQlKnGYCekAx76tRH53wA - -X%f hX Rc+zeg& dקAчXM1 \ No newline at end of file +-> ssh-ed25519 G5LX5w SRJhNenoQXbT1FgX3TMPnVH5P6oe2eHot+M1YsEjsEk +hfTSLgKi98Eh7JK5o7x2POpTEtQlQCpEa3keUFYCuME +-> ssh-ed25519 cK5kHw z5TwWJTkvx7HztjXHJW/aCOtOfPrQaLP0gyIT7rXcyU +b4NCpHfasgvkLLr+6LcWUl60p59aSNnfp3bl2OFYXo0 +-> ssh-ed25519 CAWG4Q 4VpS1/OnFe8nxcQbRTKNhjsh/ZQ5cbhSMXwK/jjQ+3o +WF9wvOkqVml4UcEzyzeumKuUwCwwr2zvKLMg+PCB8nk +-> ssh-ed25519 xA739A 67FhuJ070jBVMt/xbKHWhfri6iIm0FyaFvzQabsvFBM +1G5/913dDv/r/6p1x/c5YiUnZzrX/LvIj33KW+PN0KU +-> ssh-ed25519 MSF3dg Bj/yB4N2wkyHCHC22tcjjJAA4ebSamN0Z4UVX3ZnryI +6D/ZgTs+j+MGDAbPU5zyK0i9zN6tQy68IcOnQZ27mYg +--- 169erk3ICSYLs4FPEuXCn7QlekWhsmSn0Lr+/R14I5Q +ҽ3s +w4Db."|)";.ɫ7)LeC=S؟ \ No newline at end of file diff --git a/secrets/wg-apex.age b/secrets/wg-apex.age index d61c88c7fa5c13b8f041a848475b2f2c29554ed9..c22c16735cf39e56f9758966195c76bcd5b31e38 100644 GIT binary patch delta 607 zcmdnVx|4N+PQAIIL1dYeOH^8wse74osZmBzplfNeL1a*No||u4R#b|6T9I>-Q(AdQPF}9Q zac-ztW_h*=mvL2rbCF|Wl80|qReF(6mT#y>PDXiVX0VZ^Pp*@ZiFrwQvR7(`v1Mxh z#E;_PPR=3T0U;K-=|;h>>6H;_kzSQ<;XzSp22~km0p%9X!Nsoru4RTLPOj-(+J!zv zQU2wH+L6Zoo-QV#ndMF;c~Qm{rI}7i&V>PnIRP%ES$TQ+CfUW4;~B-ny-NyR0)34t zjRSMC!pnkO@}qpy3JQGlTuP&ivJ4F}%<}S+3JWugjGW84(#_4&d|cfPO#+;Z^3Ao) zid-s-v>i)5O0rG-ESxfY!-E6E(p@Sl+)T?SpJf!U_w@FxG*8J&PN@t|GpYzENQrPv zHndEM@-c`=@l7l9HZ7?#FEI2>HV87{Dh|pBs7#DBG{|+$s4~yB3=1uAEA;YBwKOPB z$qdO(sx-_i$@GsX&veh{($&>faL)_NsdUXuDb4qHbM(n@_s$N=ar4v8jttOu_ptOS zNX;_}bM>kWchfHl@0ASiro>cl<6mT{7#womiY2hg+T*y6Sii@^Ox_)X#n5S!TcuKHyQf_2+v5!lMNw!H!epz~s1(&X!LUD11 zZfc5=si~o*LbA7Mwnw>wlVyr_L3u<)iA9!6n6_DXnVW}yMQC7Yc|oL`Nv=m|zI#Am zqKkQ6W@&^8S5QTyx3)o%kwsxhsE?O-VpL{uptEUZVR5FuQ)Eb#i9txPXGTGJg>$yS z#E;_PiP_%HRVf9@`iACJ{wY4$c_E4AxdmS3#_pvdJ`q_iIZ;&>&VlL18SbuJ28M?I z9>$J&uDQ9!W)B1_7h zGK%tx3w$!XOoIdS{amu6DzuYw(jvX1^gT@s(u%V5OARBrip{(u4RefA%u-wf^Gkh_ z%6$wYP0g~M4MGbnLkbL2a>FeReIorLy|aoZpJf!UPtB+*%+JdTE2uE`b4#qq4Gv5; zGYhN8j&cn(%`;ET^z<()Esre9F)#Pzs`Pa=C^Shjb+gn@_4dl~OV>6ib}w}gaW62n z2+j9%Ow;!Dv?$0jN=x+R($&>f@OCycvdAmWsZ2>JGp&d!&oFTb%`(h$FZ8X@&kffu zsR+&si*(L%3-Wh~g`{K(Mx3KvK3_Xt_s*N4QIxsgZw%tEX@J z#E;_Pk*O7)9;K=Np1!%BeimMyE|q?v{@za6l~wxLh87-$Va6FD5g8^H-Z{Qp6&V(x zUR71$A!bgMe%dYtc_v;(zJA47?#}w9$@*nxK9zapSxzohmVSYg;~B-ny(@#=jYA?* z3e%i@L$kwNUECARO`Ht!3(QSSi;W}PBP-l|Djg$&109pOOkF+oE0au?2aFmMT}N-798t4g=v($&>f2u&#}%#L(7igGM)EG}q5ognHd!)<&D8t9 vssBH7=j^)$=lt_KZMC0HZod#8@Bh6k(@rB*rui32=tTGOW<0=IJub0)7 delta 607 zcmdnVx|4N+PQ8a)PHI6?m7#%qepYBjdPItEx}m?nQ<}d)sDVLoT6k{0acEUlcu;m` zK9_}YdX}q)mv*GFhrfxBS7~srM~SC_zmacNL701PvVLAjwpnpix?e!C1(&X!LUD11 zZfc5=si~o*LbA7Mwnw>wsjE+vX>piIxu2iAPobejrkis{sz-%+flr=cSc*};e}H?U zo4ZFzXrhNBm!)<=fl+!znX8*?Vw6jWo13@4Usjo8a7d=JQ+8%RmV2PHYie?4xofWH z#E;_P&IT13Ug4Pr;XcM*!T!0KxgP#W*=}BWF3!%8hRy{U5rH|b{#j*-VFt-u&N;a* z!6B8t&iQ_Z{s9@q#i2X+@!v;~B-n-HJ^^16;}~ zEs6?V3@iQg!@W|9LtUIwos*pca}%9&9ka`_k_*EF(!$cYN`vwve1d}m3q1VILXEOr zsxqrGjXg`Nl3mmNz5UI7Jq_H_@`@{sOwD{JpJf!U&vy)P3U*F%b$8AR3NOlckBCSM z$t?2AbuvwNcFT8nanE!NiwrPHOLO()@-@kDFAuM(v40O$$hObd3mga`6gI z*AFi6v&<`wEXs7PDo8Hp($&>fFb*xwsqzRh%+$~Gk8=0aHwf}B%yzdhN^;F{3l23e z&&~-7GceRo3HQlK=9+v!aWX@GZvOeTb=Q;E-=11x6YDKsrFU Date: Fri, 29 Aug 2025 14:40:43 +0200 Subject: [PATCH 431/472] Add fox machine to SLURM Reviewed-by: Aleix Roca Nonell --- m/fox/configuration.nix | 1 + m/module/slurm-common.nix | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index ab82949..aac13bb 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -6,6 +6,7 @@ ../common/xeon/console.nix ../module/emulation.nix ../module/nvidia.nix + ../module/slurm-client.nix ./wireguard.nix ]; diff --git a/m/module/slurm-common.nix b/m/module/slurm-common.nix index 2303830..180e2a5 100644 --- a/m/module/slurm-common.nix +++ b/m/module/slurm-common.nix @@ -31,10 +31,12 @@ in { clusterName = "jungle"; nodeName = [ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" + "fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1" ]; partitionName = [ "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + "fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" ]; # See slurm.conf(5) for more details about these options. @@ -62,7 +64,7 @@ in { SuspendTimeout=60 ResumeProgram=${resumeProgram} ResumeTimeout=300 - #SuspendExcNodes= + SuspendExcNodes=fox # Turn the nodes off after 1 hour of inactivity SuspendTime=3600 -- 2.49.0 From dff6eaf5876d4d6a97fe4b78e3afc25f15673a58 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 14:46:24 +0200 Subject: [PATCH 432/472] Accept fox connection to slurm controller Reviewed-by: Aleix Roca Nonell --- m/module/slurm-server.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m/module/slurm-server.nix b/m/module/slurm-server.nix index 6536136..449ba8c 100644 --- a/m/module/slurm-server.nix +++ b/m/module/slurm-server.nix @@ -13,6 +13,11 @@ iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept # Accept slurm connections from compute nodes for srun iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept + + # Accept slurm connections to controller from fox (via wireguard) + iptables -A nixos-fw -p tcp -i wg0 -s 10.100.0.1/32 --dport 6817 -j nixos-fw-accept + # Accept slurm connections from fox for srun (via wireguard) + iptables -A nixos-fw -p tcp -i wg0 -s 10.100.0.1/32 --dport 60000:61000 -j nixos-fw-accept ''; }; } -- 2.49.0 From 0dc7b7eb3ddf6a30f5a09b57ab5b56c480508c9e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 29 Aug 2025 14:55:53 +0200 Subject: [PATCH 433/472] Accept connections from apex to fox slurmd Reviewed-by: Aleix Roca Nonell --- m/fox/wireguard.nix | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix index a16f152..d10703d 100644 --- a/m/fox/wireguard.nix +++ b/m/fox/wireguard.nix @@ -36,4 +36,11 @@ networking.hosts = { "10.100.0.30" = [ "apex" ]; }; + + networking.firewall = { + extraCommands = '' + # Accept slurm connections to slurmd from apex (via wireguard) + iptables -A nixos-fw -p tcp -i wg0 -s 10.100.0.30/32 -d 10.100.0.1/32 --dport 6818 -j nixos-fw-accept + ''; + }; } -- 2.49.0 From df17b11458f8ac106afd0c87a101c451213b3de2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 1 Sep 2025 11:25:29 +0200 Subject: [PATCH 434/472] Enable fail2ban in fox Protect fox against ssh bruteforce attacks: fox% sudo lastb | head root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:25 - 11:25 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:24 - 11:24 (00:00) root ssh:notty 200.124.28.102 Mon Sep 1 11:24 - 11:24 (00:00) Reviewed-by: Aleix Roca Nonell --- m/fox/configuration.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index aac13bb..198b9c1 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -37,6 +37,8 @@ services.openssh.settings.X11Forwarding = true; + services.fail2ban.enable = true; + # Use SSH tunnel to reach internal hosts programs.ssh.extraConfig = '' Host bscpm04.bsc.es gitlab-internal.bsc.es tent -- 2.49.0 From a36eff8749af4b390e0d0fc74c7f1180e4a64bf6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 2 Sep 2025 17:12:56 +0200 Subject: [PATCH 435/472] Revert "Remove pam_slurm_adopt from fox" This reverts commit 1eac0fcad8211195499bc566e6c70312b31af700. Reviewed-by: Aleix Roca Nonell --- m/fox/configuration.nix | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 198b9c1..fc35e0d 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -95,4 +95,20 @@ wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = script; }; + + # Only allow SSH connections from users who have a SLURM allocation + # See: https://slurm.schedmd.com/pam_slurm_adopt.html + security.pam.services.sshd.rules.account.slurm = { + control = "required"; + enable = true; + modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so"; + args = [ "log_level=debug5" ]; + order = 999999; # Make it last one + }; + + # Disable systemd session (pam_systemd.so) as it will conflict with the + # pam_slurm_adopt.so module. What happens is that the shell is first adopted + # into the slurmstepd task and then into the systemd session, which is not + # what we want, otherwise it will linger even if all jobs are gone. + security.pam.services.sshd.startSession = lib.mkForce false; } -- 2.49.0 From 017c19e7d07ef53544abf599e7b59020667648bd Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 3 Sep 2025 11:12:25 +0200 Subject: [PATCH 436/472] Use 10.106.0.0/24 subnet to avoid collisions The 106 byte is the code for 'j' (jungle) in ASCII: % printf j | od -t d 0000000 106 0000001 Reviewed-by: Aleix Roca Nonell --- m/apex/wireguard.nix | 6 +++--- m/fox/wireguard.nix | 8 ++++---- m/module/slurm-server.nix | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/m/apex/wireguard.nix b/m/apex/wireguard.nix index bd5b351..49180a1 100644 --- a/m/apex/wireguard.nix +++ b/m/apex/wireguard.nix @@ -12,7 +12,7 @@ networking.wireguard.interfaces = { # "wg0" is the network interface name. You can name the interface arbitrarily. wg0 = { - ips = [ "10.100.0.30/24" ]; + ips = [ "10.106.0.30/24" ]; listenPort = 666; privateKeyFile = config.age.secrets.wgApex.path; # Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA= @@ -20,7 +20,7 @@ { name = "Fox"; publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y="; - allowedIPs = [ "10.100.0.0/24" ]; + allowedIPs = [ "10.106.0.0/24" ]; endpoint = "fox.ac.upc.edu:666"; # Send keepalives every 25 seconds. Important to keep NAT tables alive. persistentKeepalive = 25; @@ -30,6 +30,6 @@ }; networking.hosts = { - "10.100.0.1" = [ "fox" ]; + "10.106.0.1" = [ "fox" ]; }; } diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix index d10703d..8299d48 100644 --- a/m/fox/wireguard.nix +++ b/m/fox/wireguard.nix @@ -12,7 +12,7 @@ # "wg0" is the network interface name. You can name the interface arbitrarily. wg0 = { # Determines the IP address and subnet of the server's end of the tunnel interface. - ips = [ "10.100.0.1/24" ]; + ips = [ "10.106.0.1/24" ]; # The port that WireGuard listens to. Must be accessible by the client. listenPort = 666; @@ -27,20 +27,20 @@ name = "Apex"; publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA="; # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing. - allowedIPs = [ "10.100.0.30/32" ]; + allowedIPs = [ "10.106.0.30/32" ]; } ]; }; }; networking.hosts = { - "10.100.0.30" = [ "apex" ]; + "10.106.0.30" = [ "apex" ]; }; networking.firewall = { extraCommands = '' # Accept slurm connections to slurmd from apex (via wireguard) - iptables -A nixos-fw -p tcp -i wg0 -s 10.100.0.30/32 -d 10.100.0.1/32 --dport 6818 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept ''; }; } diff --git a/m/module/slurm-server.nix b/m/module/slurm-server.nix index 449ba8c..25fe4f6 100644 --- a/m/module/slurm-server.nix +++ b/m/module/slurm-server.nix @@ -15,9 +15,9 @@ iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept # Accept slurm connections to controller from fox (via wireguard) - iptables -A nixos-fw -p tcp -i wg0 -s 10.100.0.1/32 --dport 6817 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept # Accept slurm connections from fox for srun (via wireguard) - iptables -A nixos-fw -p tcp -i wg0 -s 10.100.0.1/32 --dport 60000:61000 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept ''; }; } -- 2.49.0 From 19c7e326780f7d588aecbd69bb9ccb4bda37746c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 3 Sep 2025 13:16:27 +0200 Subject: [PATCH 437/472] Allow access to NFS via wireguard subnet Reviewed-by: Aleix Roca Nonell --- m/apex/nfs.nix | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/m/apex/nfs.nix b/m/apex/nfs.nix index b1668c1..8334d50 100644 --- a/m/apex/nfs.nix +++ b/m/apex/nfs.nix @@ -8,6 +8,7 @@ statdPort = 4000; exports = '' /home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash) + /home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash) ''; }; networking.firewall = { @@ -27,6 +28,21 @@ iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept + + # Accept NFS traffic from wg0 + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept + # Same but UDP + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept ''; }; } -- 2.49.0 From ff0fc18d0ae6697d6c3c19ffbe9aeed8b2e3d795 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 3 Sep 2025 13:24:06 +0200 Subject: [PATCH 438/472] Mount home via NFS from apex in fox Reviewed-by: Aleix Roca Nonell --- m/fox/configuration.nix | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index fc35e0d..2d5f00e 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -79,6 +79,13 @@ fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; }; fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; }; + # Mount the NFS home + fileSystems."/nfs/home" = { + device = "10.106.0.30:/home"; + fsType = "nfs"; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; + }; + # Make a /nvme{0,1}/$USER directory for each user. systemd.services.create-nvme-dirs = let # Take only normal users in fox -- 2.49.0 From 084d556c56cc1a8f884f08d51463b058b4e90fa7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Jun 2025 14:55:43 +0200 Subject: [PATCH 439/472] Add AMD uProf package and driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- pkgs/amd-uprof/default.nix | 41 ++++++++++++++++++++++ pkgs/amd-uprof/driver.nix | 33 ++++++++++++++++++ pkgs/amd-uprof/makefile.patch | 66 +++++++++++++++++++++++++++++++++++ pkgs/overlay.nix | 11 ++++++ 4 files changed, 151 insertions(+) create mode 100644 pkgs/amd-uprof/default.nix create mode 100644 pkgs/amd-uprof/driver.nix create mode 100644 pkgs/amd-uprof/makefile.patch diff --git a/pkgs/amd-uprof/default.nix b/pkgs/amd-uprof/default.nix new file mode 100644 index 0000000..08633ca --- /dev/null +++ b/pkgs/amd-uprof/default.nix @@ -0,0 +1,41 @@ +{ stdenv +, lib +, curl +, cacert +, runCommandLocal +}: + +let + version = "5.1.701"; + tarball = "AMDuProf_Linux_x64_${version}.tar.bz2"; + + uprofSrc = runCommandLocal tarball { + nativeBuildInputs = [ curl ]; + outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk="; + SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt"; + } '' + curl \ + -o $out \ + 'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \ + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \ + -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \ + -H 'Accept-Language: en-US,en;q=0.5' \ + -H 'Accept-Encoding: gzip, deflate, br, zstd' \ + -H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n' + ''; + +in + stdenv.mkDerivation { + pname = "AMD-uProf"; + inherit version; + src = uprofSrc; + dontStrip = true; + phases = [ "installPhase" "fixupPhase" ]; + installPhase = '' + set -x + mkdir -p $out + tar -x -v -C $out --strip-components=1 -f $src + rm $out/bin/AMDPowerProfilerDriverSource.tar.gz + set +x + ''; + } diff --git a/pkgs/amd-uprof/driver.nix b/pkgs/amd-uprof/driver.nix new file mode 100644 index 0000000..7fdc16f --- /dev/null +++ b/pkgs/amd-uprof/driver.nix @@ -0,0 +1,33 @@ +{ stdenv +, lib +, amd-uprof +, kernel +, runCommandLocal +}: + +let + version = amd-uprof.version; + tarball = amd-uprof.src; +in stdenv.mkDerivation { + pname = "AMDPowerProfilerDriver"; + inherit version; + src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } '' + set -x + tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz + mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out + set +x + ''; + hardeningDisable = [ "pic" "format" ]; + nativeBuildInputs = kernel.moduleBuildDependencies; + patches = [ ./makefile.patch ]; + makeFlags = [ + "KERNEL_VERSION=${kernel.modDirVersion}" + "KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build" + "INSTALL_MOD_PATH=$(out)" + ]; + meta = { + description = "AMD Power Profiler Driver"; + homepage = "https://www.amd.com/es/developer/uprof.html"; + platforms = lib.platforms.linux; + }; +} diff --git a/pkgs/amd-uprof/makefile.patch b/pkgs/amd-uprof/makefile.patch new file mode 100644 index 0000000..d1e5642 --- /dev/null +++ b/pkgs/amd-uprof/makefile.patch @@ -0,0 +1,66 @@ +--- a/Makefile 2025-06-19 20:36:49.346693267 +0200 ++++ b/Makefile 2025-06-19 20:42:29.778088660 +0200 +@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi + MODULE_NAME_KO=$(MODULE_NAME).ko + + # check is module inserted +-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME)) ++#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME)) + + # check pcore dkms status + PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION)) +@@ -50,7 +50,7 @@ endif + # “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00 + G_VERSION=9 + K_VERSION=5 +-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.) ++KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.) + GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.) + ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION)))) + ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION)))) +@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs := src/PmcDataBuffe + + # make + all: +- @chmod a+x ./AMDPPcert.sh +- @./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status; +- @SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \ +- if [ $$SIGSTATUS1 -eq 1 ]; then \ +- exit 1; \ +- fi +- @make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules +- @SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \ +- if [ $$SIGSTATUS3 -eq 0 ]; then \ +- ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \ +- fi ++ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules + + # make clean + clean: +@@ -84,23 +74,9 @@ clean: + + # make install + install: +- @mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra +- @rm -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO) +- @cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/ +- @depmod -a +- @if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \ +- echo "Uninstalling AMDPowerProfiler Linux kernel module.";\ +- rmmod $(MODULE_NAME);\ +- fi +- @modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \ +- cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \ +- echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \ +- if [ $$SIGSTATUS1 -eq 0 ]; then \ +- echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \ +- exit 1; \ +- else \ +- cat $(PWD)/sign_status1; \ +- fi ++ mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/ ++ cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/ ++ + # make dkms + dkms: + @chmod a+x ./AMDPPcert.sh diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 9d3c960..4198cd3 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -53,4 +53,15 @@ final: prev: meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { }; cudainfo = prev.callPackage ./cudainfo/default.nix { }; + + amd-uprof = prev.callPackage ./amd-uprof/default.nix { }; + + # FIXME: Extend this to all linuxPackages variants. Open problem, see: + # https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636 + linuxPackages = prev.linuxPackages.extend (_final: _prev: { + amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { }; + }); + linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: { + amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { }; + }); } -- 2.49.0 From e6e4846529960c9b9858edc5b8edc78b80cc1135 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 20 Jun 2025 15:51:46 +0200 Subject: [PATCH 440/472] Add AMD uProf module and enable it in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 3 +++ m/module/amd-uprof.nix | 49 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 m/module/amd-uprof.nix diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 2d5f00e..85af518 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/base.nix ../common/xeon/console.nix + ../module/amd-uprof.nix ../module/emulation.nix ../module/nvidia.nix ../module/slurm-client.nix @@ -29,6 +30,8 @@ # Use performance for benchmarks powerManagement.cpuFreqGovernor = "performance"; + services.amd-uprof.enable = true; + # Disable NUMA balancing boot.kernel.sysctl."kernel.numa_balancing" = 0; diff --git a/m/module/amd-uprof.nix b/m/module/amd-uprof.nix new file mode 100644 index 0000000..7d20a6f --- /dev/null +++ b/m/module/amd-uprof.nix @@ -0,0 +1,49 @@ +{ config, lib, pkgs, ... }: + +{ + options = { + services.amd-uprof = { + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = "Whether to enable AMD uProf."; + }; + }; + }; + + # Only setup amd-uprof if enabled + config = lib.mkIf config.services.amd-uprof.enable { + + # First make sure that we add the module to the list of available modules + # in the kernel matching the same kernel version of this configuration. + boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ]; + boot.kernelModules = [ "AMDPowerProfiler" ]; + + # Make the userspace tools available in $PATH. + environment.systemPackages = with pkgs; [ amd-uprof ]; + + # The AMDPowerProfiler module doesn't create the /dev device nor it emits + # any uevents, so we cannot use udev rules to automatically create the + # device. Instead, we run a systemd unit that does it after loading the + # modules. + systemd.services.amd-uprof-device = { + description = "Create /dev/AMDPowerProfiler device"; + after = [ "systemd-modules-load.service" ]; + wantedBy = [ "multi-user.target" ]; + unitConfig.ConditionPathExists = [ + "/proc/AMDPowerProfiler/device" + "!/dev/AMDPowerProfiler" + ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" '' + mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0 + ''; + ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" '' + rm -f /dev/AMDPowerProfiler + ''; + }; + }; + }; +} -- 2.49.0 From cf1db201b2849ffbbba9cbfeea6d678781fc6fac Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 4 Sep 2025 12:00:33 +0200 Subject: [PATCH 441/472] Use CFLAGS_MODULE instead of EXTRA_CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the build in Linux 6.15.6, as it was not able to find the include files. Reviewed-by: Aleix Boné --- pkgs/amd-uprof/makefile.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/amd-uprof/makefile.patch b/pkgs/amd-uprof/makefile.patch index d1e5642..7e36cee 100644 --- a/pkgs/amd-uprof/makefile.patch +++ b/pkgs/amd-uprof/makefile.patch @@ -33,7 +33,7 @@ - if [ $$SIGSTATUS3 -eq 0 ]; then \ - ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \ - fi -+ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules ++ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules # make clean clean: -- 2.49.0 From f7d676de772511a8cbba4ce93fd01671895f3f97 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 4 Sep 2025 12:20:42 +0200 Subject: [PATCH 442/472] Fix hrtimer new interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hrtimer_init() is now done via hrtimer_setup() with the callback function as argument. See: https://lwn.net/Articles/996598/ Reviewed-by: Aleix Boné --- pkgs/amd-uprof/driver.nix | 2 +- pkgs/amd-uprof/hrtimer.patch | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 pkgs/amd-uprof/hrtimer.patch diff --git a/pkgs/amd-uprof/driver.nix b/pkgs/amd-uprof/driver.nix index 7fdc16f..e69ef2c 100644 --- a/pkgs/amd-uprof/driver.nix +++ b/pkgs/amd-uprof/driver.nix @@ -19,7 +19,7 @@ in stdenv.mkDerivation { ''; hardeningDisable = [ "pic" "format" ]; nativeBuildInputs = kernel.moduleBuildDependencies; - patches = [ ./makefile.patch ]; + patches = [ ./makefile.patch ./hrtimer.patch ]; makeFlags = [ "KERNEL_VERSION=${kernel.modDirVersion}" "KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build" diff --git a/pkgs/amd-uprof/hrtimer.patch b/pkgs/amd-uprof/hrtimer.patch new file mode 100644 index 0000000..24befa3 --- /dev/null +++ b/pkgs/amd-uprof/hrtimer.patch @@ -0,0 +1,31 @@ +--- a/src/PmcTimerConfig.c 2025-09-04 12:17:16.771707049 +0200 ++++ b/src/PmcTimerConfig.c 2025-09-04 12:17:04.878515468 +0200 +@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo) + + DRVPRINT("pTimerConfig(%p)", pTimerConfig); + +- hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); ++ hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + } + + int PmcSetupTimer(ClientContext* pClientCtx) +@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient + { + /* Interval in ms */ + pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000); +- pTimerConfig->m_hrTimer.function = PmcTimerCallback; + + DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time); + } +--- a/src/PwrProfTimer.c 2025-09-04 12:18:08.750544327 +0200 ++++ b/src/PwrProfTimer.c 2025-09-04 12:18:28.557863382 +0200 +@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu) + pCoreClientData = &per_cpu(g_coreClientData, cpu); + + // initialize HR timer +- hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); +- pCoreClientData->m_hrTimer.function = &HrTimerCallback; ++ hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + + return; + } // InitHrTimer -- 2.49.0 From ac0deb47b6d666ae36bb6ab86793337a4bf36fe7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 5 Sep 2025 13:01:11 +0200 Subject: [PATCH 443/472] Fix amd-uprof dependencies with patchelf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- pkgs/amd-uprof/default.nix | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pkgs/amd-uprof/default.nix b/pkgs/amd-uprof/default.nix index 08633ca..7d2fd86 100644 --- a/pkgs/amd-uprof/default.nix +++ b/pkgs/amd-uprof/default.nix @@ -3,6 +3,19 @@ , curl , cacert , runCommandLocal +, autoPatchelfHook +, elfutils +, glib +, libGL +, ncurses5 +, xorg +, zlib +, libxkbcommon +, freetype +, fontconfig +, libGLU +, dbus +, rocmPackages }: let @@ -31,11 +44,36 @@ in src = uprofSrc; dontStrip = true; phases = [ "installPhase" "fixupPhase" ]; + nativeBuildInputs = [ autoPatchelfHook ]; + buildInputs = [ + stdenv.cc.cc.lib + ncurses5 + elfutils + glib + libGL + libGLU + xorg.libX11 + xorg.libXext + xorg.libXi + xorg.libXmu + xorg.libxcb + xorg.xcbutilwm + xorg.xcbutilrenderutil + xorg.xcbutilkeysyms + xorg.xcbutilimage + fontconfig.lib + libxkbcommon + zlib + freetype + dbus + rocmPackages.rocprofiler + ]; installPhase = '' set -x mkdir -p $out tar -x -v -C $out --strip-components=1 -f $src rm $out/bin/AMDPowerProfilerDriverSource.tar.gz + patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so set +x ''; } -- 2.49.0 From d6126501babd9782be4248b2840da493241c30d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 16 Sep 2025 15:53:28 +0200 Subject: [PATCH 444/472] Disable NMI watchdog in fox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 85af518..cce4fbe 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -38,6 +38,9 @@ # Expose kernel addresses boot.kernel.sysctl."kernel.kptr_restrict" = 0; + # Disable NMI watchdog to save one hw counter (for AMD uProf) + boot.kernel.sysctl."kernel.nmi_watchdog" = 0; + services.openssh.settings.X11Forwarding = true; services.fail2ban.enable = true; -- 2.49.0 From a6dfc267fd00aeeb3a63693656eaa5b20b4b32d9 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 16 Sep 2025 15:57:04 +0200 Subject: [PATCH 445/472] Fix hidden dependencies for AMDuProfSys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It tries to dlopen libcrypt.so.1 and libstdc++.so.6, so we make sure they are available by adding them to the runpath. Reviewed-by: Aleix Boné --- pkgs/amd-uprof/default.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkgs/amd-uprof/default.nix b/pkgs/amd-uprof/default.nix index 7d2fd86..f412ad2 100644 --- a/pkgs/amd-uprof/default.nix +++ b/pkgs/amd-uprof/default.nix @@ -16,6 +16,7 @@ , libGLU , dbus , rocmPackages +, libxcrypt-legacy }: let @@ -52,6 +53,7 @@ in glib libGL libGLU + libxcrypt-legacy xorg.libX11 xorg.libXext xorg.libXi @@ -74,6 +76,7 @@ in tar -x -v -C $out --strip-components=1 -f $src rm $out/bin/AMDPowerProfilerDriverSource.tar.gz patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so + patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys set +x ''; } -- 2.49.0 From 4da778047203b62af2eff4328b4f982695b0e44b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 Sep 2025 11:44:49 +0200 Subject: [PATCH 446/472] Add amd_hsmp module in fox for AMD uProf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/fox/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index cce4fbe..3f6742b 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -22,7 +22,7 @@ swapDevices = lib.mkForce []; boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ]; - boot.kernelModules = [ "kvm-amd" "amd_uncore" ]; + boot.kernelModules = [ "kvm-amd" "amd_uncore" "amd_hsmp" ]; hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; hardware.cpu.intel.updateMicrocode = lib.mkForce false; -- 2.49.0 From 94cbfd38a6ae558709f6bd0544699ad7a710cede Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 18 Sep 2025 13:15:44 +0200 Subject: [PATCH 447/472] Fix AMDuProfPcm so it finds libnuma.so MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We change the search procedure so it detects NixOS from /etc/os-release and uses "libnuma.so" when calling dlopen, instead of harcoding a full path to /usr. The full patch of libnuma is stored in the runpath, so dlopen can find it. Reviewed-by: Aleix Boné Tested-by: Vincent Arcila --- pkgs/amd-uprof/default.nix | 9 ++++++++- pkgs/amd-uprof/libnuma.r2 | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 pkgs/amd-uprof/libnuma.r2 diff --git a/pkgs/amd-uprof/default.nix b/pkgs/amd-uprof/default.nix index f412ad2..9eb6707 100644 --- a/pkgs/amd-uprof/default.nix +++ b/pkgs/amd-uprof/default.nix @@ -17,12 +17,15 @@ , dbus , rocmPackages , libxcrypt-legacy +, numactl +, radare2 }: let version = "5.1.701"; tarball = "AMDuProf_Linux_x64_${version}.tar.bz2"; + # NOTE: Remember to update the radare2 patch below if AMDuProfPcm changes. uprofSrc = runCommandLocal tarball { nativeBuildInputs = [ curl ]; outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk="; @@ -45,7 +48,7 @@ in src = uprofSrc; dontStrip = true; phases = [ "installPhase" "fixupPhase" ]; - nativeBuildInputs = [ autoPatchelfHook ]; + nativeBuildInputs = [ autoPatchelfHook radare2 ]; buildInputs = [ stdenv.cc.cc.lib ncurses5 @@ -69,6 +72,7 @@ in freetype dbus rocmPackages.rocprofiler + numactl ]; installPhase = '' set -x @@ -77,6 +81,9 @@ in rm $out/bin/AMDPowerProfilerDriverSource.tar.gz patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys + echo "16334a51fcc48668307ad94e20482ca4 $out/bin/AMDuProfPcm" | md5sum -c - + radare2 -w -q -i ${./libnuma.r2} $out/bin/AMDuProfPcm + patchelf --add-needed libnuma.so $out/bin/AMDuProfPcm set +x ''; } diff --git a/pkgs/amd-uprof/libnuma.r2 b/pkgs/amd-uprof/libnuma.r2 new file mode 100644 index 0000000..77cbabc --- /dev/null +++ b/pkgs/amd-uprof/libnuma.r2 @@ -0,0 +1,10 @@ +# Patch arguments to call sym std::string::find(char const*, unsigned long, unsigned long) +# so it matches NixOS: +# +# Change OS name to NixOS +wz NixOS @ 0x00550a43 +# And set the length to 5 characters +wa mov ecx, 5 @0x00517930 +# +# Then change the argument to dlopen() so it only uses libnuma.so +wz libnuma.so @ 0x00562940 -- 2.49.0 From 7d5aebf88276dc8e2a753719d06da23e93fc548e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 17 Sep 2025 13:08:48 +0200 Subject: [PATCH 448/472] Share a public folder for documents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/tent/nginx.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix index de9214e..731b011 100644 --- a/m/tent/nginx.nix +++ b/m/tent/nginx.nix @@ -67,6 +67,9 @@ in location /p/ { alias /var/lib/p/; } + location /pub/ { + alias /vault/pub/; + } ''; }; }; -- 2.49.0 From 6afe05b5fdc50501a9ac7248c917209eadf268ef Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 19 Sep 2025 13:18:12 +0200 Subject: [PATCH 449/472] Use lowercase peer hostnames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/apex/wireguard.nix | 2 +- m/fox/wireguard.nix | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/m/apex/wireguard.nix b/m/apex/wireguard.nix index 49180a1..607564f 100644 --- a/m/apex/wireguard.nix +++ b/m/apex/wireguard.nix @@ -18,7 +18,7 @@ # Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA= peers = [ { - name = "Fox"; + name = "fox"; publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y="; allowedIPs = [ "10.106.0.0/24" ]; endpoint = "fox.ac.upc.edu:666"; diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix index 8299d48..d0b4349 100644 --- a/m/fox/wireguard.nix +++ b/m/fox/wireguard.nix @@ -24,7 +24,7 @@ peers = [ # List of allowed peers. { - name = "Apex"; + name = "apex"; publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA="; # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing. allowedIPs = [ "10.106.0.30/32" ]; -- 2.49.0 From e98fdb89ab660fc7f6a7e4ee1ff26a8021f05cda Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 19 Sep 2025 13:20:54 +0200 Subject: [PATCH 450/472] Restrict fox peer to a single IP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/apex/wireguard.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/apex/wireguard.nix b/m/apex/wireguard.nix index 607564f..0a6ac5f 100644 --- a/m/apex/wireguard.nix +++ b/m/apex/wireguard.nix @@ -20,7 +20,7 @@ { name = "fox"; publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y="; - allowedIPs = [ "10.106.0.0/24" ]; + allowedIPs = [ "10.106.0.1/32" ]; endpoint = "fox.ac.upc.edu:666"; # Send keepalives every 25 seconds. Important to keep NAT tables alive. persistentKeepalive = 25; -- 2.49.0 From d49d078bed894d0cb12d94a582746c0a85e0503e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 19 Sep 2025 13:26:56 +0200 Subject: [PATCH 451/472] Add raccoon host key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- keys.nix | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/keys.nix b/keys.nix index b26f11a..d491d6d 100644 --- a/keys.nix +++ b/keys.nix @@ -2,21 +2,22 @@ # here all the public keys rec { hosts = { - hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut"; - owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1"; - owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2"; - eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy"; - koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; - bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; - lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; - fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; - tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent"; - apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex"; - weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel"; + hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut"; + owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1"; + owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2"; + eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy"; + koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; + bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; + lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; + fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; + tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent"; + apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex"; + weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel"; + raccoon = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNQttFvL0dNEyy7klIhLoK4xXOeM2/K9R7lPMTG3qvK raccoon"; }; hostGroup = with hosts; rec { - compute = [ owl1 owl2 fox ]; + compute = [ owl1 owl2 fox raccoon ]; playground = [ eudy koro weasel ]; storage = [ bay lake2 ]; monitor = [ hut ]; -- 2.49.0 From 1f0cb4ae766984d11d3fc2a1a2f2d4bb12fd3604 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 25 Sep 2025 15:01:33 +0200 Subject: [PATCH 452/472] Add raccoon peer to wireguard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It routes traffic from fox, apex and the compute nodes so that we can reach the git servers and tent. Reviewed-by: Aleix Boné --- m/apex/wireguard.nix | 7 +++++ m/common/base/net.nix | 1 - m/common/ssf.nix | 1 + m/common/ssf/hosts-remote.nix | 9 +++++++ m/fox/wireguard.nix | 7 +++++ m/raccoon/configuration.nix | 1 + m/raccoon/wireguard.nix | 48 ++++++++++++++++++++++++++++++++++ m/tent/configuration.nix | 1 + secrets/secrets.nix | 2 ++ secrets/wg-raccoon.age | Bin 0 -> 697 bytes 10 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 m/common/ssf/hosts-remote.nix create mode 100644 m/raccoon/wireguard.nix create mode 100644 secrets/wg-raccoon.age diff --git a/m/apex/wireguard.nix b/m/apex/wireguard.nix index 0a6ac5f..4721d2d 100644 --- a/m/apex/wireguard.nix +++ b/m/apex/wireguard.nix @@ -25,11 +25,18 @@ # Send keepalives every 25 seconds. Important to keep NAT tables alive. persistentKeepalive = 25; } + { + name = "raccoon"; + publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI="; + allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ]; + } ]; }; }; networking.hosts = { "10.106.0.1" = [ "fox" ]; + "10.106.0.236" = [ "raccoon" ]; + "10.0.44.4" = [ "tent" ]; }; } diff --git a/m/common/base/net.nix b/m/common/base/net.nix index 64e6160..88c8c00 100644 --- a/m/common/base/net.nix +++ b/m/common/base/net.nix @@ -15,7 +15,6 @@ hosts = { "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ]; - "84.88.51.152" = [ "raccoon" ]; "84.88.51.142" = [ "raccoon-ipmi" ]; }; }; diff --git a/m/common/ssf.nix b/m/common/ssf.nix index 8e8dc6b..98a98e5 100644 --- a/m/common/ssf.nix +++ b/m/common/ssf.nix @@ -4,6 +4,7 @@ ./xeon.nix ./ssf/fs.nix ./ssf/hosts.nix + ./ssf/hosts-remote.nix ./ssf/net.nix ./ssf/ssh.nix ]; diff --git a/m/common/ssf/hosts-remote.nix b/m/common/ssf/hosts-remote.nix new file mode 100644 index 0000000..1660f73 --- /dev/null +++ b/m/common/ssf/hosts-remote.nix @@ -0,0 +1,9 @@ +{ pkgs, ... }: + +{ + networking.hosts = { + # Remote hosts visible from compute nodes + "10.106.0.236" = [ "raccoon" ]; + "10.0.44.4" = [ "tent" ]; + }; +} diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix index d0b4349..7a55c2e 100644 --- a/m/fox/wireguard.nix +++ b/m/fox/wireguard.nix @@ -29,12 +29,19 @@ # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing. allowedIPs = [ "10.106.0.30/32" ]; } + { + name = "raccoon"; + publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI="; + allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ]; + } ]; }; }; networking.hosts = { "10.106.0.30" = [ "apex" ]; + "10.106.0.236" = [ "raccoon" ]; + "10.0.44.4" = [ "tent" ]; }; networking.firewall = { diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index cec17e6..7a608e2 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -8,6 +8,7 @@ ../module/ssh-hut-extern.nix ../module/nvidia.nix ../eudy/kernel/perf.nix + ./wireguard.nix ]; # Don't install Grub on the disk yet diff --git a/m/raccoon/wireguard.nix b/m/raccoon/wireguard.nix new file mode 100644 index 0000000..daf4883 --- /dev/null +++ b/m/raccoon/wireguard.nix @@ -0,0 +1,48 @@ +{ config, pkgs, ... }: + +{ + networking.nat = { + enable = true; + enableIPv6 = false; + externalInterface = "eno0"; + internalInterfaces = [ "wg0" ]; + }; + + networking.firewall = { + allowedUDPPorts = [ 666 ]; + }; + + age.secrets.wgRaccoon.file = ../../secrets/wg-raccoon.age; + + # Enable WireGuard + networking.wireguard.enable = true; + networking.wireguard.interfaces = { + wg0 = { + ips = [ "10.106.0.236/24" ]; + listenPort = 666; + privateKeyFile = config.age.secrets.wgRaccoon.path; + # Public key: QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI= + peers = [ + { + name = "fox"; + publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y="; + allowedIPs = [ "10.106.0.1/32" ]; + endpoint = "fox.ac.upc.edu:666"; + persistentKeepalive = 25; + } + { + name = "apex"; + publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA="; + allowedIPs = [ "10.106.0.30/32" "10.0.40.0/24" ]; + endpoint = "ssfhead.bsc.es:666"; + persistentKeepalive = 25; + } + ]; + }; + }; + + networking.hosts = { + "10.106.0.1" = [ "fox.wg" ]; + "10.106.0.30" = [ "apex.wg" ]; + }; +} diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 416d8df..9d56ba7 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -35,6 +35,7 @@ defaultGateway = "10.0.44.1"; hosts = { "84.88.53.236" = [ "apex" ]; + "10.0.44.1" = [ "raccoon" ]; }; }; diff --git a/secrets/secrets.nix b/secrets/secrets.nix index 9673249..920d52d 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -4,6 +4,7 @@ let hut = [ keys.hosts.hut ] ++ adminsKeys; fox = [ keys.hosts.fox ] ++ adminsKeys; apex = [ keys.hosts.apex ] ++ adminsKeys; + raccoon = [ keys.hosts.raccoon ] ++ adminsKeys; mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys; tent = [ keys.hosts.tent ] ++ adminsKeys; # Only expose ceph keys to safe nodes and admins @@ -29,4 +30,5 @@ in "wg-fox.age".publicKeys = fox; "wg-apex.age".publicKeys = apex; + "wg-raccoon.age".publicKeys = raccoon; } diff --git a/secrets/wg-raccoon.age b/secrets/wg-raccoon.age new file mode 100644 index 0000000000000000000000000000000000000000..f32a2aa395b4fcaf3bdd9cf09e9164d3a85aad62 GIT binary patch literal 697 zcmYdHPt{G$OD?J`D9Oyv)5|YP*Do{V(zR14F3!+RO))YxHMCR+k2J4zFIR|6%-1f> zv~&&3NvXq zasr)vT*}dH^9^=0PDxiN@bs!Q)HgHBHIFa~^A9mF&P(%34GD+}Eet9v_Y4ltD+@Mr zb`J}33U%djF0TwU^SAITuME%eu88#T^UY7qa7@qf4Jh|4s>lrqH_fx~4@&e7^~vYb z)zwvqsL1j)cPk7iGxA8z$cPNfH8R)s@yjgD^$066vh)nfa5nVL&oinjuJq)3z`xP` zd5>n%_lLzsEWg+^*k+4KKA2?7U}x?pD Date: Thu, 25 Sep 2025 15:15:43 +0200 Subject: [PATCH 453/472] Remove extra SSH jump configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now have direct visibility among nodes so we don't need any extra SSH configuration to reach them. Reviewed-by: Aleix Boné --- m/apex/configuration.nix | 11 ----------- m/common/base/net.nix | 2 ++ m/common/ssf.nix | 1 - m/common/ssf/ssh.nix | 16 ---------------- m/fox/configuration.nix | 10 ---------- m/module/ssh-hut-extern.nix | 8 -------- m/raccoon/configuration.nix | 2 +- m/tent/configuration.nix | 2 +- 8 files changed, 4 insertions(+), 48 deletions(-) delete mode 100644 m/common/ssf/ssh.nix delete mode 100644 m/module/ssh-hut-extern.nix diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index 17828a3..82d3842 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -56,17 +56,6 @@ }; }; - # Use SSH tunnel to reach internal hosts - programs.ssh.extraConfig = '' - Host bscpm04.bsc.es gitlab-internal.bsc.es knights3.bsc.es - ProxyCommand nc -X connect -x localhost:23080 %h %p - Host raccoon - HostName knights3.bsc.es - ProxyCommand nc -X connect -x localhost:23080 %h %p - Host tent - ProxyJump raccoon - ''; - networking.firewall = { extraCommands = '' # Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our diff --git a/m/common/base/net.nix b/m/common/base/net.nix index 88c8c00..9fe6c4d 100644 --- a/m/common/base/net.nix +++ b/m/common/base/net.nix @@ -16,6 +16,8 @@ hosts = { "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ]; "84.88.51.142" = [ "raccoon-ipmi" ]; + "192.168.11.12" = [ "bscpm04.bsc.es" ]; + "192.168.11.15" = [ "gitlab-internal.bsc.es" ]; }; }; } diff --git a/m/common/ssf.nix b/m/common/ssf.nix index 98a98e5..ef74da3 100644 --- a/m/common/ssf.nix +++ b/m/common/ssf.nix @@ -6,6 +6,5 @@ ./ssf/hosts.nix ./ssf/hosts-remote.nix ./ssf/net.nix - ./ssf/ssh.nix ]; } diff --git a/m/common/ssf/ssh.nix b/m/common/ssf/ssh.nix deleted file mode 100644 index b73abd7..0000000 --- a/m/common/ssf/ssh.nix +++ /dev/null @@ -1,16 +0,0 @@ -{ - # Use SSH tunnel to apex to reach internal hosts - programs.ssh.extraConfig = '' - Host tent - ProxyJump raccoon - - # Access raccoon via the HTTP proxy - Host raccoon knights3.bsc.es - HostName knights3.bsc.es - ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p' - - # Make sure we can reach gitlab even if we don't have SSH access to raccoon - Host bscpm04.bsc.es gitlab-internal.bsc.es - ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p' - ''; -} diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index 3f6742b..e890ba5 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -45,16 +45,6 @@ services.fail2ban.enable = true; - # Use SSH tunnel to reach internal hosts - programs.ssh.extraConfig = '' - Host bscpm04.bsc.es gitlab-internal.bsc.es tent - ProxyJump raccoon - Host raccoon - ProxyJump apex - HostName 127.0.0.1 - Port 22022 - ''; - networking = { timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ]; hostName = "fox"; diff --git a/m/module/ssh-hut-extern.nix b/m/module/ssh-hut-extern.nix deleted file mode 100644 index cc6b26a..0000000 --- a/m/module/ssh-hut-extern.nix +++ /dev/null @@ -1,8 +0,0 @@ -{ - programs.ssh.extraConfig = '' - Host apex ssfhead - HostName ssflogin.bsc.es - Host hut - ProxyJump apex - ''; -} diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index 7a608e2..b68b519 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -3,9 +3,9 @@ { imports = [ ../common/base.nix + ../common/ssf/hosts.nix ../module/emulation.nix ../module/debuginfod.nix - ../module/ssh-hut-extern.nix ../module/nvidia.nix ../eudy/kernel/perf.nix ./wireguard.nix diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 9d56ba7..79c9f45 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -3,9 +3,9 @@ { imports = [ ../common/xeon.nix + ../common/ssf/hosts.nix ../module/emulation.nix ../module/debuginfod.nix - ../module/ssh-hut-extern.nix ./monitoring.nix ./nginx.nix ./nix-serve.nix -- 2.49.0 From e1c950a530ed51b00bac3900ee200a2d2d0685f3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 19 Sep 2025 13:48:50 +0200 Subject: [PATCH 454/472] Mount apex /home via NFS in raccoon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/raccoon/configuration.nix | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index b68b519..b2c59fe 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -44,6 +44,13 @@ }; }; + # Mount the NFS home + fileSystems."/nfs/home" = { + device = "10.106.0.30:/home"; + fsType = "nfs"; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; + }; + nix.settings = { extra-substituters = [ "https://jungle.bsc.es/cache" ]; extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; -- 2.49.0 From ad1544759fc5c8039d4a045986f4d7b0f187d725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Mon, 29 Sep 2025 17:30:02 +0200 Subject: [PATCH 455/472] Remove machine access for user csiringo Reviewed-by: Rodrigo Arias Mallo --- m/common/base/users.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 8aace8d..651f4b6 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -162,7 +162,7 @@ home = "/home/Computational/csiringo"; description = "Cesare Siringo"; group = "Computational"; - hosts = [ "apex" "weasel" ]; + hosts = [ ]; hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1"; openssh.authorizedKeys.keys = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es" -- 2.49.0 From 5fcd57a061a725754ede77740610e0ec25e398a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Thu, 25 Sep 2025 17:10:10 +0200 Subject: [PATCH 456/472] Use hut substituter in all nodes Reviewed-by: Rodrigo Arias Mallo --- m/apex/configuration.nix | 7 +------ m/bay/configuration.nix | 1 + m/eudy/configuration.nix | 1 + m/fox/configuration.nix | 7 +------ m/lake2/configuration.nix | 1 + m/raccoon/configuration.nix | 6 +----- m/tent/configuration.nix | 1 + m/weasel/configuration.nix | 1 + 8 files changed, 8 insertions(+), 17 deletions(-) diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix index 82d3842..6b1073b 100644 --- a/m/apex/configuration.nix +++ b/m/apex/configuration.nix @@ -5,6 +5,7 @@ ../common/xeon.nix ../common/ssf/hosts.nix ../module/ceph.nix + ../module/hut-substituter.nix ../module/slurm-server.nix ./nfs.nix ./wireguard.nix @@ -65,10 +66,4 @@ iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse ''; }; - - # Use tent for cache - nix.settings = { - extra-substituters = [ "https://jungle.bsc.es/cache" ]; - extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; - }; } diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 5c6f93c..47e8264 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/ssf.nix + ../module/hut-substituter.nix ../module/monitoring.nix ]; diff --git a/m/eudy/configuration.nix b/m/eudy/configuration.nix index d676135..ddb894e 100644 --- a/m/eudy/configuration.nix +++ b/m/eudy/configuration.nix @@ -9,6 +9,7 @@ ./cpufreq.nix ./fs.nix ./users.nix + ../module/hut-substituter.nix ../module/debuginfod.nix ]; diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix index e890ba5..8c381f8 100644 --- a/m/fox/configuration.nix +++ b/m/fox/configuration.nix @@ -8,6 +8,7 @@ ../module/emulation.nix ../module/nvidia.nix ../module/slurm-client.nix + ../module/hut-substituter.nix ./wireguard.nix ]; @@ -62,12 +63,6 @@ interfaces.enp1s0f0np0.useDHCP = true; }; - # Use hut for cache - nix.settings = { - extra-substituters = [ "https://jungle.bsc.es/cache" ]; - extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; - }; - # Recommended for new graphics cards hardware.nvidia.open = true; diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix index e2f350d..a67e5ae 100644 --- a/m/lake2/configuration.nix +++ b/m/lake2/configuration.nix @@ -4,6 +4,7 @@ imports = [ ../common/ssf.nix ../module/monitoring.nix + ../module/hut-substituter.nix ]; boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix index b2c59fe..38ce719 100644 --- a/m/raccoon/configuration.nix +++ b/m/raccoon/configuration.nix @@ -9,6 +9,7 @@ ../module/nvidia.nix ../eudy/kernel/perf.nix ./wireguard.nix + ../module/hut-substituter.nix ]; # Don't install Grub on the disk yet @@ -51,11 +52,6 @@ options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; }; - nix.settings = { - extra-substituters = [ "https://jungle.bsc.es/cache" ]; - extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; - }; - # Enable performance governor powerManagement.cpuFreqGovernor = "performance"; diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix index 79c9f45..a165b6b 100644 --- a/m/tent/configuration.nix +++ b/m/tent/configuration.nix @@ -15,6 +15,7 @@ ../hut/msmtp.nix ../module/p.nix ../module/vpn-dac.nix + ../module/hut-substituter.nix ]; # Select the this using the ID to avoid mismatches diff --git a/m/weasel/configuration.nix b/m/weasel/configuration.nix index 43ee735..995d0fd 100644 --- a/m/weasel/configuration.nix +++ b/m/weasel/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/ssf.nix + ../module/hut-substituter.nix ]; # Select this using the ID to avoid mismatches -- 2.49.0 From 0668f0db74ade07f6243744b5ae7c4e7a87468f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Mon, 29 Sep 2025 09:41:34 +0200 Subject: [PATCH 457/472] Lower connect timeout when using hut substituter Reviewed-by: Rodrigo Arias Mallo --- m/module/hut-substituter.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/m/module/hut-substituter.nix b/m/module/hut-substituter.nix index 8d27a45..92fda0b 100644 --- a/m/module/hut-substituter.nix +++ b/m/module/hut-substituter.nix @@ -6,5 +6,8 @@ { extra-substituters = [ "http://hut/cache" ]; extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + + # Set a low timeout in case hut is down + connect-timeout = 3; # seconds }; } -- 2.49.0 From f69629d2da0e46dcd64e70fa7454277bf8aea3f8 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Mon, 29 Sep 2025 19:17:33 +0200 Subject: [PATCH 458/472] Restart slurmd on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A failure to reach the control node can cause slurmd to fail and the unit remains in the failed state until is manually restarted. Instead, try to restart the service every 30 seconds, forever: owl1% systemctl show slurmd | grep -E 'Restart=|RestartUSec=' Restart=on-failure RestartUSec=30s owl1% pgrep slurmd 5903 owl1% sudo kill -SEGV 5903 owl1% pgrep slurmd 6137 Fixes: https://jungle.bsc.es/git/rarias/jungle/issues/177 Reviewed-by: Aleix Boné --- m/module/slurm-client.nix | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix index 84ba4c7..deec844 100644 --- a/m/module/slurm-client.nix +++ b/m/module/slurm-client.nix @@ -12,6 +12,12 @@ # https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb # https://bugs.schedmd.com/show_bug.cgi?id=2095#c24 KillMode = lib.mkForce "control-group"; + + # If slurmd fails to contact the control server it will fail, causing the + # node to remain out of service until manually restarted. Always try to + # restart it. + Restart = "always"; + RestartSec = "30s"; }; services.slurm.client.enable = true; -- 2.49.0 From b040bebd1d0981b8fb6967ef33cb1d4d5eef318b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 30 Sep 2025 18:26:33 +0200 Subject: [PATCH 459/472] Add acinca user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/common/base/users.nix | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/m/common/base/users.nix b/m/common/base/users.nix index 651f4b6..6717fef 100644 --- a/m/common/base/users.nix +++ b/m/common/base/users.nix @@ -156,7 +156,6 @@ }; csiringo = { - # Arbitrary UID but large so it doesn't collide with other users on ssfhead. uid = 9653; isNormalUser = true; home = "/home/Computational/csiringo"; @@ -168,6 +167,19 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es" ]; }; + + acinca = { + uid = 9654; + isNormalUser = true; + home = "/home/Computational/acinca"; + description = "Arnau Cinca"; + group = "Computational"; + hosts = [ "apex" "hut" "fox" "owl1" "owl2" ]; + hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc" + ]; + }; }; groups = { -- 2.49.0 From ee6f9810060e7f5a44901e5413dbaa54a9f23195 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 2 Oct 2025 13:06:37 +0200 Subject: [PATCH 460/472] Add script to trim the repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- doc/trim.sh | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 doc/trim.sh diff --git a/doc/trim.sh b/doc/trim.sh new file mode 100755 index 0000000..4ae5368 --- /dev/null +++ b/doc/trim.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +# Trims the jungle repository by moving the website to its own repository and +# removing it from jungle. It also removes big pdf files and kernel +# configurations so the jungle repository is small. + +set -e + +if [ -e oldjungle -o -e newjungle -o -e website ]; then + echo "remove oldjungle/, newjungle/ and website/ first" + exit 1 +fi + +# Clone the old jungle repo +git clone gitea@tent:rarias/jungle.git oldjungle + +# First split the website into a new repository +mkdir website && git -C website init -b master +git-filter-repo \ + --path web \ + --subdirectory-filter web \ + --source oldjungle \ + --target website + +# Then remove the website, pdf files and big kernel configs +mkdir newjungle && git -C newjungle init -b master +git-filter-repo \ + --invert-paths \ + --path web \ + --path-glob 'doc*.pdf' \ + --path-glob '**/kernel/configs/lockdep' \ + --path-glob '**/kernel/configs/defconfig' \ + --source oldjungle \ + --target newjungle + +set -x + +du -sh oldjungle newjungle website +# 57M oldjungle +# 2,3M newjungle +# 6,4M website + +du -sh --exclude=.git oldjungle newjungle website +# 30M oldjungle +# 700K newjungle +# 3,5M website -- 2.49.0 From f3bfe89f275384a5def14c9fd229129416218ba2 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Thu, 2 Oct 2025 13:28:07 +0200 Subject: [PATCH 461/472] Fetch website from its own git repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- m/hut/nginx.nix | 7 +++++-- m/tent/nginx.nix | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix index 97afc3a..f38d587 100644 --- a/m/hut/nginx.nix +++ b/m/hut/nginx.nix @@ -2,10 +2,13 @@ let website = pkgs.stdenv.mkDerivation { name = "jungle-web"; - src = theFlake; + src = pkgs.fetchgit { + url = "https://jungle.bsc.es/git/rarias/jungle-website.git"; + rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1"; + hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4="; + }; buildInputs = [ pkgs.hugo ]; buildPhase = '' - cd web rm -rf public/ hugo ''; diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix index 731b011..4568690 100644 --- a/m/tent/nginx.nix +++ b/m/tent/nginx.nix @@ -2,10 +2,13 @@ let website = pkgs.stdenv.mkDerivation { name = "jungle-web"; - src = theFlake; + src = pkgs.fetchgit { + url = "https://jungle.bsc.es/git/rarias/jungle-website.git"; + rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1"; + hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4="; + }; buildInputs = [ pkgs.hugo ]; buildPhase = '' - cd web rm -rf public/ hugo ''; -- 2.49.0 From e42058f08bcf7c0606e92415fad590e7758f271a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Wed, 1 Oct 2025 16:48:47 +0200 Subject: [PATCH 462/472] Allow access to hut from fox Reviewed-by: Rodrigo Arias Mallo --- m/fox/wireguard.nix | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix index 7a55c2e..f93c3e4 100644 --- a/m/fox/wireguard.nix +++ b/m/fox/wireguard.nix @@ -23,11 +23,11 @@ peers = [ # List of allowed peers. - { + { name = "apex"; publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA="; # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing. - allowedIPs = [ "10.106.0.30/32" ]; + allowedIPs = [ "10.106.0.30/32" "10.0.40.7/32" ]; } { name = "raccoon"; @@ -40,6 +40,7 @@ networking.hosts = { "10.106.0.30" = [ "apex" ]; + "10.0.40.7" = [ "hut" ]; "10.106.0.236" = [ "raccoon" ]; "10.0.44.4" = [ "tent" ]; }; -- 2.49.0 From ae6b0ae161663d3c911d6dcabf61643146586fcd Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 3 Oct 2025 14:14:20 +0200 Subject: [PATCH 463/472] Move MPICH to pkgs/mpich and set as default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- overlay.nix | 2 ++ pkgs/mpich/default.nix | 78 +++++++++++++----------------------------- pkgs/overlay.nix | 30 ---------------- 3 files changed, 25 insertions(+), 85 deletions(-) diff --git a/overlay.nix b/overlay.nix index 4831c55..08f5190 100644 --- a/overlay.nix +++ b/overlay.nix @@ -20,6 +20,8 @@ let jemallocNanos6 = callPackage ./pkgs/nanos6/jemalloc.nix { }; lmbench = callPackage ./pkgs/lmbench/default.nix { }; mcxx = callPackage ./pkgs/mcxx/default.nix { }; + mpi = final.mpich; # Set MPICH as default + mpich = callPackage ./pkgs/mpich/default.nix { mpich = prev.mpich; }; nanos6 = callPackage ./pkgs/nanos6/default.nix { }; nanos6Debug = final.nanos6.override { enableDebug = true; }; nixtools = callPackage ./pkgs/nixtools/default.nix { }; diff --git a/pkgs/mpich/default.nix b/pkgs/mpich/default.nix index 36bee85..4b5307a 100644 --- a/pkgs/mpich/default.nix +++ b/pkgs/mpich/default.nix @@ -1,68 +1,36 @@ { stdenv , lib -, fetchurl -, perl -, gfortran -, openssh -, hwloc , libfabric -, enableDebug ? false +, mpich +, pmix +, gfortran +, symlinkJoin }: -with lib; - -stdenv.mkDerivation rec { - pname = "mpich"; - version = "3.3.2"; - - src = fetchurl { - url = "https://www.mpich.org/static/downloads/${version}/mpich-${version}.tar.gz"; - sha256 = "1farz5zfx4cd0c3a0wb9pgfypzw0xxql1j1294z1sxslga1ziyjb"; +let + # pmix comes with the libraries in .out and headers in .dev + pmixAll = symlinkJoin { + name = "pmix-all"; + paths = [ pmix.dev pmix.out ]; }; - +in mpich.overrideAttrs (old: { + buildInput = old.buildInputs ++ [ + libfabric + pmixAll + ]; configureFlags = [ "--enable-shared" "--enable-sharedlib" + "--with-pm=no" "--with-device=ch4:ofi" + "--with-pmi=pmix" + "--with-pmix=${pmixAll}" "--with-libfabric=${libfabric}" - ] - ++ optional enableDebug "--enable-g=dbg,log"; - - enableParallelBuilding = true; - - buildInputs = [ perl gfortran openssh hwloc libfabric ]; + "--enable-g=log" + ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ + "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 + "FCFLAGS=-fallow-argument-mismatch" + ]; hardeningDisable = [ "all" ]; - - # doCheck = true; # Fails - - preFixup = '' - # Ensure the default compilers are the ones mpich was built with - sed -i 's:CC="gcc":CC=${stdenv.cc}/bin/gcc:' $out/bin/mpicc - sed -i 's:CXX="g++":CXX=${stdenv.cc}/bin/g++:' $out/bin/mpicxx - sed -i 's:FC="gfortran":FC=${gfortran}/bin/gfortran:' $out/bin/mpifort - '' - + lib.optionalString (!stdenv.isDarwin) '' - # /tmp/nix-build... ends up in the RPATH, fix it manually - for entry in $out/bin/mpichversion $out/bin/mpivars; do - echo "fix rpath: $entry" - patchelf --set-rpath "$out/lib" $entry - done - ''; - - meta = with lib; { - description = "Implementation of the Message Passing Interface (MPI) standard"; - - longDescription = '' - MPICH is a high-performance and widely portable implementation of - the Message Passing Interface (MPI) standard (MPI-1, MPI-2 and MPI-3). - ''; - homepage = "http://www.mpich.org"; - license = { - url = "https://github.com/pmodels/mpich/blob/v${version}/COPYRIGHT"; - fullName = "MPICH license (permissive)"; - }; - maintainers = [ ]; - platforms = platforms.linux ++ platforms.darwin; - }; -} +}) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 4198cd3..d3be38d 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -1,35 +1,5 @@ final: prev: { - # Set MPICH as default - mpi = final.mpich; - - # Configure the network for MPICH - mpich = with final; let - # pmix comes with the libraries in .out and headers in .dev - pmixAll = symlinkJoin { - name = "pmix-all"; - paths = [ pmix.dev pmix.out ]; - }; - in prev.mpich.overrideAttrs (old: { - buildInput = old.buildInputs ++ [ - libfabric - pmixAll - ]; - configureFlags = [ - "--enable-shared" - "--enable-sharedlib" - "--with-pm=no" - "--with-device=ch4:ofi" - "--with-pmi=pmix" - "--with-pmix=${pmixAll}" - "--with-libfabric=${libfabric}" - "--enable-g=log" - ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ - "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 - "FCFLAGS=-fallow-argument-mismatch" - ]; - }); - slurm = prev.slurm.overrideAttrs (old: { patches = (old.patches or []) ++ [ # See https://bugs.schedmd.com/show_bug.cgi?id=19324 -- 2.49.0 From 826d6a28efb5e7273a15466098682e351a965b7e Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 10:38:22 +0200 Subject: [PATCH 464/472] Move slurm to pkgs/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- pkgs/overlay.nix | 19 ---- pkgs/slurm/default.nix | 96 ++++----------------- pkgs/{ => slurm}/slurm-rank-expansion.patch | 0 3 files changed, 19 insertions(+), 96 deletions(-) rename pkgs/{ => slurm}/slurm-rank-expansion.patch (100%) diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index d3be38d..43ff0cd 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -1,24 +1,5 @@ final: prev: { - slurm = prev.slurm.overrideAttrs (old: { - patches = (old.patches or []) ++ [ - # See https://bugs.schedmd.com/show_bug.cgi?id=19324 - ./slurm-rank-expansion.patch - ]; - # Install also the pam_slurm_adopt library to restrict users from accessing - # nodes with no job allocated. - postBuild = (old.postBuild or "") + '' - pushd contribs/pam_slurm_adopt - make "PAM_DIR=$out/lib/security" - popd - ''; - postInstall = (old.postInstall or "") + '' - pushd contribs/pam_slurm_adopt - make "PAM_DIR=$out/lib/security" install - popd - ''; - }); - prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { }; diff --git a/pkgs/slurm/default.nix b/pkgs/slurm/default.nix index fd7a43c..355ff4c 100644 --- a/pkgs/slurm/default.nix +++ b/pkgs/slurm/default.nix @@ -1,80 +1,22 @@ -{ stdenv, lib, fetchFromGitHub, pkg-config, libtool, curl -, python, munge, perl, pam, openssl -, ncurses, libmysqlclient, gtk2, lua, hwloc, numactl -, readline, freeipmi, libssh2, xorg -, pmix -# enable internal X11 support via libssh2 -, enableX11 ? true -}: +{ slurm }: -stdenv.mkDerivation rec { - name = "slurm-${version}"; - version = "17.11.9-2"; - - # N.B. We use github release tags instead of https://www.schedmd.com/downloads.php - # because the latter does not keep older releases. - src = fetchFromGitHub { - owner = "SchedMD"; - repo = "slurm"; - # The release tags use - instead of . - rev = "${builtins.replaceStrings ["."] ["-"] name}"; - sha256 = "1lq4ac6yjai6wh979dciw8v3d99zbd3w36rfh0vpncqm672fg1qy"; - }; - - outputs = [ "out" "dev" ]; - - prePatch = lib.optional enableX11 '' - substituteInPlace src/common/x11_util.c \ - --replace '"/usr/bin/xauth"' '"${xorg.xauth}/bin/xauth"' +slurm.overrideAttrs (old: { + patches = (old.patches or []) ++ [ + # See https://bugs.schedmd.com/show_bug.cgi?id=19324 + # Still unmerged as of 2025-10-03, another corpo-cancer. + ./slurm-rank-expansion.patch + ]; + # Install also the pam_slurm_adopt library to restrict users from accessing + # nodes with no job allocated. + # TODO: Review pam_slurm_adopt, I don't trust their code much. + postBuild = (old.postBuild or "") + '' + pushd contribs/pam_slurm_adopt + make "PAM_DIR=$out/lib/security" + popd ''; - - # nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode' - # https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es - # this doesn't fix tests completely at least makes slurmd to launch - hardeningDisable = [ "bindnow" ]; - - nativeBuildInputs = [ pkg-config libtool ]; - buildInputs = [ - curl python munge perl pam openssl - libmysqlclient ncurses gtk2 - lua hwloc numactl readline freeipmi - pmix - ] ++ lib.optionals enableX11 [ libssh2 xorg.xauth ]; - - configureFlags = with lib; - [ "--with-munge=${munge}" - "--with-ssl=${openssl.dev}" - "--with-hwloc=${hwloc.dev}" - "--with-freeipmi=${freeipmi}" - "--sysconfdir=/etc/slurm" - "--with-pmix=${pmix}" - ] ++ (optional (gtk2 == null) "--disable-gtktest") - ++ (optional enableX11 "--with-libssh2=${libssh2.dev}"); - - - preConfigure = '' - patchShebangs ./doc/html/shtml2html.py - patchShebangs ./doc/man/man2html.py - patchShebangs ./configure + postInstall = (old.postInstall or "") + '' + pushd contribs/pam_slurm_adopt + make "PAM_DIR=$out/lib/security" install + popd ''; - -# postBuild = '' -# pushd contrib/pmi2 -# make -j install -# popd -# ''; - - postInstall = '' - rm -f $out/lib/*.la $out/lib/slurm/*.la - ''; - - enableParallelBuilding = true; - - meta = with lib; { - homepage = http://www.schedmd.com/; - description = "Simple Linux Utility for Resource Management"; - platforms = platforms.linux; - license = licenses.gpl2; - maintainers = with maintainers; [ jagajaga markuskowa ]; - }; -} +}) diff --git a/pkgs/slurm-rank-expansion.patch b/pkgs/slurm/slurm-rank-expansion.patch similarity index 100% rename from pkgs/slurm-rank-expansion.patch rename to pkgs/slurm/slurm-rank-expansion.patch -- 2.49.0 From b94a1493d508752b467f93c491e79dbfde8299e6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 10:38:57 +0200 Subject: [PATCH 465/472] Merge flake.nix with bscpkgs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- flake.nix | 54 +++++++++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/flake.nix b/flake.nix index 0ee0ebd..827b47f 100644 --- a/flake.nix +++ b/flake.nix @@ -14,6 +14,12 @@ let specialArgs = { inherit nixpkgs bscpkgs agenix; theFlake = self; }; modules = [ "${self.outPath}/m/${name}/configuration.nix" ]; }; + # For now we only support x86 + system = "x86_64-linux"; + pkgs = import nixpkgs { + inherit system; + overlays = [ self.overlays.default ]; + }; in { nixosConfigurations = { @@ -31,36 +37,22 @@ in weasel = mkConf "weasel"; }; - packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { - bscpkgs = bscpkgs.packages.x86_64-linux; - nixpkgs = nixpkgs.legacyPackages.x86_64-linux; - }; - }; + #packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { + # bscpkgs = bscpkgs.packages.x86_64-linux; + # nixpkgs = nixpkgs.legacyPackages.x86_64-linux; + #}; -# TODO: Merge from bscpkgs: -# -# inputs.nixpkgs.url = "nixpkgs"; -# -# outputs = { self, nixpkgs, ...}: -# let -# # For now we only support x86 -# system = "x86_64-linux"; -# pkgs = import nixpkgs { -# inherit system; -# overlays = [ self.overlays.default ]; -# }; -# in -# { -# bscOverlay = import ./overlay.nix; -# overlays.default = self.bscOverlay; -# # full nixpkgs with our overlay applied -# legacyPackages.${system} = pkgs; -# -# hydraJobs = { -# inherit (self.legacyPackages.${system}.bsc-ci) tests pkgs cross; -# }; -# -# # propagate nixpkgs lib, so we can do bscpkgs.lib -# inherit (nixpkgs) lib; -# }; + bscOverlay = import ./overlay.nix; + overlays.default = self.bscOverlay; + + # full nixpkgs with our overlay applied + legacyPackages.${system} = pkgs; + + hydraJobs = { + inherit (self.legacyPackages.${system}.bsc-ci) tests pkgs cross; + }; + + # propagate nixpkgs lib, so we can do bscpkgs.lib + inherit (nixpkgs) lib; + }; } -- 2.49.0 From 63822bb054301af6296906138cafb70126a753aa Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 11:57:17 +0200 Subject: [PATCH 466/472] Move the rest of packages to main overlay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- overlay.nix | 13 +++++++++++++ pkgs/overlay.nix | 18 ------------------ .../default.nix} | 0 3 files changed, 13 insertions(+), 18 deletions(-) delete mode 100644 pkgs/overlay.nix rename pkgs/{slurm-exporter.nix => slurm-exporter/default.nix} (100%) diff --git a/overlay.nix b/overlay.nix index 08f5190..6d2af14 100644 --- a/overlay.nix +++ b/overlay.nix @@ -7,6 +7,7 @@ let callPackage = final.callPackage; bscPkgs = { + amd-uprof = prev.callPackage ./pkgs/amd-uprof/default.nix { }; bench6 = callPackage ./pkgs/bench6/default.nix { }; bigotes = callPackage ./pkgs/bigotes/default.nix { }; clangOmpss2 = callPackage ./pkgs/llvm-ompss2/default.nix { }; @@ -14,12 +15,22 @@ let clangOmpss2Nodes = callPackage ./pkgs/llvm-ompss2/default.nix { ompss2rt = final.nodes; openmp = final.openmp; }; clangOmpss2NodesOmpv = callPackage ./pkgs/llvm-ompss2/default.nix { ompss2rt = final.nodes; openmp = final.openmpv; }; clangOmpss2Unwrapped = callPackage ./pkgs/llvm-ompss2/clang.nix { }; + cudainfo = prev.callPackage ./pkgs/cudainfo/default.nix { }; #extrae = callPackage ./pkgs/extrae/default.nix { }; # Broken and outdated gpi-2 = callPackage ./pkgs/gpi-2/default.nix { }; intelPackages_2023 = callPackage ./pkgs/intel-oneapi/2023.nix { }; jemallocNanos6 = callPackage ./pkgs/nanos6/jemalloc.nix { }; + # FIXME: Extend this to all linuxPackages variants. Open problem, see: + # https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636 + linuxPackages = prev.linuxPackages.extend (_final: _prev: { + amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { }; + }); + linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: { + amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { }; + }); lmbench = callPackage ./pkgs/lmbench/default.nix { }; mcxx = callPackage ./pkgs/mcxx/default.nix { }; + meteocat-exporter = prev.callPackage ./pkgs/meteocat-exporter/default.nix { }; mpi = final.mpich; # Set MPICH as default mpich = callPackage ./pkgs/mpich/default.nix { mpich = prev.mpich; }; nanos6 = callPackage ./pkgs/nanos6/default.nix { }; @@ -36,6 +47,7 @@ let ovni = callPackage ./pkgs/ovni/default.nix { }; ovniGit = final.ovni.override { useGit = true; }; paraverKernel = callPackage ./pkgs/paraver/kernel.nix { }; + prometheus-slurm-exporter = prev.callPackage ./pkgs/slurm-exporter/default.nix { }; #pscom = callPackage ./pkgs/parastation/pscom.nix { }; # Unmaintaned #psmpi = callPackage ./pkgs/parastation/psmpi.nix { }; # Unmaintaned sonar = callPackage ./pkgs/sonar/default.nix { }; @@ -45,6 +57,7 @@ let stdenvClangOmpss2NodesOmpv = final.stdenv.override { cc = final.clangOmpss2NodesOmpv; allowedRequisites = null; }; tagaspi = callPackage ./pkgs/tagaspi/default.nix { }; tampi = callPackage ./pkgs/tampi/default.nix { }; + upc-qaire-exporter = prev.callPackage ./pkgs/upc-qaire-exporter/default.nix { }; wxparaver = callPackage ./pkgs/paraver/default.nix { }; }; diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix deleted file mode 100644 index 43ff0cd..0000000 --- a/pkgs/overlay.nix +++ /dev/null @@ -1,18 +0,0 @@ -final: prev: -{ - prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; - meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; - upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { }; - cudainfo = prev.callPackage ./cudainfo/default.nix { }; - - amd-uprof = prev.callPackage ./amd-uprof/default.nix { }; - - # FIXME: Extend this to all linuxPackages variants. Open problem, see: - # https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636 - linuxPackages = prev.linuxPackages.extend (_final: _prev: { - amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { }; - }); - linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: { - amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { }; - }); -} diff --git a/pkgs/slurm-exporter.nix b/pkgs/slurm-exporter/default.nix similarity index 100% rename from pkgs/slurm-exporter.nix rename to pkgs/slurm-exporter/default.nix -- 2.49.0 From b1a37ae1fe717b4ffbcc6b5d8dc5ba40884a59ce Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 12:32:04 +0200 Subject: [PATCH 467/472] Enable unfree packages in nixpkgs config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index 827b47f..3a6c85f 100644 --- a/flake.nix +++ b/flake.nix @@ -19,6 +19,7 @@ let pkgs = import nixpkgs { inherit system; overlays = [ self.overlays.default ]; + config.allowUnfree = true; }; in { -- 2.49.0 From 188ba6df0a79510c16e0935f13e4f773737fbe38 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 12:36:33 +0200 Subject: [PATCH 468/472] Remove bscpkgs input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- flake.lock | 21 --------------------- flake.nix | 11 ++--------- m/common/base/nix.nix | 5 ++--- 3 files changed, 4 insertions(+), 33 deletions(-) diff --git a/flake.lock b/flake.lock index 30ed5ac..9b3e8be 100644 --- a/flake.lock +++ b/flake.lock @@ -23,26 +23,6 @@ "type": "github" } }, - "bscpkgs": { - "inputs": { - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1749650500, - "narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=", - "ref": "refs/heads/master", - "rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405", - "revCount": 961, - "type": "git", - "url": "https://git.sr.ht/~rodarima/bscpkgs" - }, - "original": { - "type": "git", - "url": "https://git.sr.ht/~rodarima/bscpkgs" - } - }, "darwin": { "inputs": { "nixpkgs": [ @@ -105,7 +85,6 @@ "root": { "inputs": { "agenix": "agenix", - "bscpkgs": "bscpkgs", "nixpkgs": "nixpkgs" } }, diff --git a/flake.nix b/flake.nix index 3a6c85f..0bdaabe 100644 --- a/flake.nix +++ b/flake.nix @@ -3,15 +3,13 @@ nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; agenix.url = "github:ryantm/agenix"; agenix.inputs.nixpkgs.follows = "nixpkgs"; - bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; - bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; }; - outputs = { self, nixpkgs, agenix, bscpkgs, ... }: + outputs = { self, nixpkgs, agenix, ... }: let mkConf = name: nixpkgs.lib.nixosSystem { system = "x86_64-linux"; - specialArgs = { inherit nixpkgs bscpkgs agenix; theFlake = self; }; + specialArgs = { inherit nixpkgs agenix; theFlake = self; }; modules = [ "${self.outPath}/m/${name}/configuration.nix" ]; }; # For now we only support x86 @@ -38,11 +36,6 @@ in weasel = mkConf "weasel"; }; - #packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { - # bscpkgs = bscpkgs.packages.x86_64-linux; - # nixpkgs = nixpkgs.legacyPackages.x86_64-linux; - #}; - bscOverlay = import ./overlay.nix; overlays.default = self.bscOverlay; diff --git a/m/common/base/nix.nix b/m/common/base/nix.nix index 0e41b27..ed58f6e 100644 --- a/m/common/base/nix.nix +++ b/m/common/base/nix.nix @@ -1,9 +1,8 @@ -{ pkgs, nixpkgs, bscpkgs, theFlake, ... }: +{ pkgs, nixpkgs, theFlake, ... }: { nixpkgs.overlays = [ - bscpkgs.bscOverlay - (import ../../../pkgs/overlay.nix) + (import ../../../overlay.nix) ]; nixpkgs.config.allowUnfree = true; -- 2.49.0 From e8ac9dfb646d894f72d2f255812d89fecb28acd6 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 14:22:55 +0200 Subject: [PATCH 469/472] Upgrade README after bscpkgs merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index abbeb58..50cc8fd 100644 --- a/README.md +++ b/README.md @@ -1 +1,9 @@ -Nix overlay with BSC packages. +# Jungle + +This repository provides two components that can be used independently: + +- A Nix overlay with packages used at BSC (formerly known as bscpkgs). Access + them directly with `nix shell .#`. + +- NixOS configurations for jungle machines. Use `nixos-rebuild switch --flake .` + to upgrade the current machine. -- 2.49.0 From ca48ce556c9754467f9cfe632a6096e446d07619 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 14:34:11 +0200 Subject: [PATCH 470/472] Update gitlab CI after merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b38911a..f7b0dd6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,4 +3,4 @@ build:bsc-ci.all: tags: - nix script: - - nix build -L "jungle#bsc-ci.all" --override-input bscpkgs . -v --show-trace + - nix build -L --no-link --print-out-paths .#bsc-ci.all -- 2.49.0 From 44cc60fcd8918da5c9c0b3d8c7c1c947a15ed41b Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 7 Oct 2025 14:38:50 +0200 Subject: [PATCH 471/472] Update license year range to 2025 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Aleix Boné --- COPYING | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/COPYING b/COPYING index 7dacb7a..19df166 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (c) 2020-2021 Barcelona Supercomputing Center +Copyright (c) 2020-2025 Barcelona Supercomputing Center Copyright (c) 2003-2020 Eelco Dolstra and the Nixpkgs/NixOS contributors Permission is hereby granted, free of charge, to any person obtaining -- 2.49.0 From 43a1b25c2338a8c7a2cd44501e1158fc69accf31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Bon=C3=A9?= Date: Wed, 23 Jul 2025 10:43:33 +0200 Subject: [PATCH 472/472] Set strictDeps=true on our top level packages --- overlay.nix | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/overlay.nix b/overlay.nix index 6d2af14..df21edf 100644 --- a/overlay.nix +++ b/overlay.nix @@ -6,7 +6,11 @@ with final.lib; let callPackage = final.callPackage; - bscPkgs = { + mkStrict = drv: if (isDerivation drv && drv ? overrideAttrs && !(drv ? strictDeps)) + then drv.overrideAttrs { strictDeps = true; } + else drv; + + bscPkgs = mapAttrs (_: mkStrict) { amd-uprof = prev.callPackage ./pkgs/amd-uprof/default.nix { }; bench6 = callPackage ./pkgs/bench6/default.nix { }; bigotes = callPackage ./pkgs/bigotes/default.nix { }; -- 2.49.0