diff --git a/.gitignore b/.gitignore index c0199ee..e79c8c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -source -result **.swp +/result +/misc diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b38911a..f7b0dd6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,4 +3,4 @@ build:bsc-ci.all: tags: - nix script: - - nix build -L "jungle#bsc-ci.all" --override-input bscpkgs . -v --show-trace + - nix build -L --no-link --print-out-paths .#bsc-ci.all diff --git a/COPYING b/COPYING index 7dacb7a..19df166 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (c) 2020-2021 Barcelona Supercomputing Center +Copyright (c) 2020-2025 Barcelona Supercomputing Center Copyright (c) 2003-2020 Eelco Dolstra and the Nixpkgs/NixOS contributors Permission is hereby granted, free of charge, to any person obtaining diff --git a/README.md b/README.md index abbeb58..50cc8fd 100644 --- a/README.md +++ b/README.md @@ -1 +1,9 @@ -Nix overlay with BSC packages. +# Jungle + +This repository provides two components that can be used independently: + +- A Nix overlay with packages used at BSC (formerly known as bscpkgs). Access + them directly with `nix shell .#`. + +- NixOS configurations for jungle machines. Use `nixos-rebuild switch --flake .` + to upgrade the current machine. diff --git a/doc/install.md b/doc/install.md new file mode 100644 index 0000000..cee67c9 --- /dev/null +++ b/doc/install.md @@ -0,0 +1,176 @@ +# Installing NixOS in a new node + +This article shows the steps to install NixOS in a node following the +configuration of the repo. + +## Enable the serial console + +By default, the nodes have the serial console disabled in the GRUB and also boot +without the serial enabled. + +To enable the serial console in the GRUB, set in /etc/default/grub the following +lines: + +``` +GRUB_TERMINAL="console serial" +GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=1" +``` + +To boot Linux with the serial enabled, so you can see the boot log and login via +serial set: + +``` +GRUB_CMDLINE_LINUX="console=ttyS0,115200n8 console=tty0" +``` + +Then update the grub config: + +``` +# grub2-mkconfig -o /boot/grub2/grub.cfg +``` + +And reboot. + +## Prepare the disk + +Create a main partition and label it `nixos` following [the manual][1]. + +[1]: https://nixos.org/manual/nixos/stable/index.html#sec-installation-manual-partitioning. + +``` +# disk=/dev/sdX +# parted $disk -- mklabel msdos +# parted $disk -- mkpart primary 1MB -8GB +# parted $disk -- mkpart primary linux-swap -8GB 100% +# parted $disk -- set 1 boot on +``` + +Then create an etx4 filesystem, labeled `nixos` where the system will be +installed. **Ensure that no other partition has the same label.** + +``` +# mkfs.ext4 -L nixos "${disk}1" +# mkswap -L swap "${disk}2" +# mount ${disk}1 /mnt +# lsblk -f $disk +NAME FSTYPE LABEL UUID MOUNTPOINT +sdX +`-sdX1 ext4 nixos 10d73b75-809c-4fa3-b99d-4fab2f0d0d8e /mnt +``` + +## Prepare nix and nixos-install + +Mount the nix store from the hut node in read-only /nix. + +``` +# mkdir /nix +# mount -o ro hut:/nix /nix +``` + +Get the nix binary and nixos-install tool from hut: + +``` +# ssh hut 'readlink -f $(which nix)' +/nix/store/0sxbaj71c4c4n43qhdxm31f56gjalksw-nix-2.13.3/bin/nix +# ssh hut 'readlink -f $(which nixos-install)' +/nix/store/9yq8ps06ysr2pfiwiij39ny56yk3pdcs-nixos-install/bin/nixos-install +``` + +And add them to the PATH: + +``` +# export PATH=$PATH:/nix/store/0sxbaj71c4c4n43qhdxm31f56gjalksw-nix-2.13.3/bin +# export PATH=$PATH:/nix/store/9yq8ps06ysr2pfiwiij39ny56yk3pdcs-nixos-install/bin/ +# nix --version +nix (Nix) 2.13.3 +``` + +## Adapt owl configuration + +Clone owl repo: + +``` +$ git clone git@bscpm03.bsc.es:rarias/owl.git +$ cd owl +``` + +Edit the configuration to your needs. + +## Install from another Linux OS + +Install nixOS into the storage drive. + +``` +# nixos-install --flake --root /mnt .#xeon0X +``` + +At this point, the nixOS grub has been installed into the nixos device, which +is not the default boot device. To keep both the old Linux and NixOS grubs, add +an entry into the old Linux grub to jump into the new grub. + +``` +# echo " + +menuentry 'NixOS' { + insmod chain + search --no-floppy --label nixos --set root + configfile /boot/grub/grub.cfg +} " >> /etc/grub.d/40_custom +``` + +Rebuild grub config. + +``` +# grub2-mkconfig -o /boot/grub/grub.cfg +``` + +To boot into NixOS manually, reboot and select NixOS in the grub menu to boot +into NixOS. + +To temporarily boot into NixOS only on the next reboot run: + +``` +# grub2-reboot 'NixOS' +``` + +To permanently boot into NixOS as the default boot OS, edit `/etc/default/grub/`: + +``` +GRUB_DEFAULT='NixOS' +``` + +And update grub. + +``` +# grub2-mkconfig -o /boot/grub/grub.cfg +``` + +## Build the nixos kexec image + +``` +# nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v +``` + +## Chain NixOS in same disk with other systems + +To install NixOS on a partition along another system which controls the GRUB, +first disable the grub device, so the GRUB is not installed in the disk by +NixOS (only the /boot files will be generated): + +``` +boot.loader.grub.device = "nodev"; +``` + +Then add the following entry to the old GRUB configuration: + +``` +menuentry 'NixOS' { + insmod chain + search --no-floppy --label nixos --set root + configfile /boot/grub/grub.cfg +} +``` + +The partition with NixOS must have the label "nixos" for it to be found. New +system configuration entries will be stored in the GRUB configuration managed +by NixOS, so there is no need to change the old GRUB settings. diff --git a/doc/trim.sh b/doc/trim.sh new file mode 100755 index 0000000..4ae5368 --- /dev/null +++ b/doc/trim.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +# Trims the jungle repository by moving the website to its own repository and +# removing it from jungle. It also removes big pdf files and kernel +# configurations so the jungle repository is small. + +set -e + +if [ -e oldjungle -o -e newjungle -o -e website ]; then + echo "remove oldjungle/, newjungle/ and website/ first" + exit 1 +fi + +# Clone the old jungle repo +git clone gitea@tent:rarias/jungle.git oldjungle + +# First split the website into a new repository +mkdir website && git -C website init -b master +git-filter-repo \ + --path web \ + --subdirectory-filter web \ + --source oldjungle \ + --target website + +# Then remove the website, pdf files and big kernel configs +mkdir newjungle && git -C newjungle init -b master +git-filter-repo \ + --invert-paths \ + --path web \ + --path-glob 'doc*.pdf' \ + --path-glob '**/kernel/configs/lockdep' \ + --path-glob '**/kernel/configs/defconfig' \ + --source oldjungle \ + --target newjungle + +set -x + +du -sh oldjungle newjungle website +# 57M oldjungle +# 2,3M newjungle +# 6,4M website + +du -sh --exclude=.git oldjungle newjungle website +# 30M oldjungle +# 700K newjungle +# 3,5M website diff --git a/flake.lock b/flake.lock index 8fcc4a2..9b3e8be 100644 --- a/flake.lock +++ b/flake.lock @@ -1,22 +1,107 @@ { "nodes": { + "agenix": { + "inputs": { + "darwin": "darwin", + "home-manager": "home-manager", + "nixpkgs": [ + "nixpkgs" + ], + "systems": "systems" + }, + "locked": { + "lastModified": 1750173260, + "narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=", + "owner": "ryantm", + "repo": "agenix", + "rev": "531beac616433bac6f9e2a19feb8e99a22a66baf", + "type": "github" + }, + "original": { + "owner": "ryantm", + "repo": "agenix", + "type": "github" + } + }, + "darwin": { + "inputs": { + "nixpkgs": [ + "agenix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1744478979, + "narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=", + "owner": "lnl7", + "repo": "nix-darwin", + "rev": "43975d782b418ebf4969e9ccba82466728c2851b", + "type": "github" + }, + "original": { + "owner": "lnl7", + "ref": "master", + "repo": "nix-darwin", + "type": "github" + } + }, + "home-manager": { + "inputs": { + "nixpkgs": [ + "agenix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1745494811, + "narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=", + "owner": "nix-community", + "repo": "home-manager", + "rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "home-manager", + "type": "github" + } + }, "nixpkgs": { "locked": { "lastModified": 1752436162, "narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=", - "path": "/nix/store/zk8v61cpk1wprp9ld5ayc1g5fq4pdkwv-source", + "owner": "NixOS", + "repo": "nixpkgs", "rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8", - "type": "path" + "type": "github" }, "original": { - "id": "nixpkgs", - "type": "indirect" + "owner": "NixOS", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" } }, "root": { "inputs": { + "agenix": "agenix", "nixpkgs": "nixpkgs" } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 0cdb289..0bdaabe 100644 --- a/flake.nix +++ b/flake.nix @@ -1,26 +1,52 @@ { - inputs.nixpkgs.url = "nixpkgs"; + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + agenix.url = "github:ryantm/agenix"; + agenix.inputs.nixpkgs.follows = "nixpkgs"; + }; - outputs = { self, nixpkgs, ...}: - let - # For now we only support x86 - system = "x86_64-linux"; - pkgs = import nixpkgs { - inherit system; - overlays = [ self.overlays.default ]; - }; - in - { - bscOverlay = import ./overlay.nix; - overlays.default = self.bscOverlay; - # full nixpkgs with our overlay applied - legacyPackages.${system} = pkgs; - - hydraJobs = { - inherit (self.legacyPackages.${system}.bsc-ci) tests pkgs cross; - }; - - # propagate nixpkgs lib, so we can do bscpkgs.lib - inherit (nixpkgs) lib; + outputs = { self, nixpkgs, agenix, ... }: +let + mkConf = name: nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + specialArgs = { inherit nixpkgs agenix; theFlake = self; }; + modules = [ "${self.outPath}/m/${name}/configuration.nix" ]; + }; + # For now we only support x86 + system = "x86_64-linux"; + pkgs = import nixpkgs { + inherit system; + overlays = [ self.overlays.default ]; + config.allowUnfree = true; + }; +in + { + nixosConfigurations = { + hut = mkConf "hut"; + tent = mkConf "tent"; + owl1 = mkConf "owl1"; + owl2 = mkConf "owl2"; + eudy = mkConf "eudy"; + koro = mkConf "koro"; + bay = mkConf "bay"; + lake2 = mkConf "lake2"; + raccoon = mkConf "raccoon"; + fox = mkConf "fox"; + apex = mkConf "apex"; + weasel = mkConf "weasel"; }; + + bscOverlay = import ./overlay.nix; + overlays.default = self.bscOverlay; + + # full nixpkgs with our overlay applied + legacyPackages.${system} = pkgs; + + hydraJobs = { + inherit (self.legacyPackages.${system}.bsc-ci) tests pkgs cross; + }; + + # propagate nixpkgs lib, so we can do bscpkgs.lib + inherit (nixpkgs) lib; + }; } diff --git a/keys.nix b/keys.nix new file mode 100644 index 0000000..d491d6d --- /dev/null +++ b/keys.nix @@ -0,0 +1,37 @@ +# As agenix needs to parse the secrets from a standalone .nix file, we describe +# here all the public keys +rec { + hosts = { + hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut"; + owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1"; + owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2"; + eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy"; + koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; + bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; + lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; + fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; + tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent"; + apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex"; + weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel"; + raccoon = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNQttFvL0dNEyy7klIhLoK4xXOeM2/K9R7lPMTG3qvK raccoon"; + }; + + hostGroup = with hosts; rec { + compute = [ owl1 owl2 fox raccoon ]; + playground = [ eudy koro weasel ]; + storage = [ bay lake2 ]; + monitor = [ hut ]; + login = [ apex ]; + + system = storage ++ monitor ++ login; + safe = system ++ compute; + all = safe ++ playground; + }; + + admins = { + "rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; + "rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent"; + "rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox"; + root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"; + }; +} diff --git a/m/apex/configuration.nix b/m/apex/configuration.nix new file mode 100644 index 0000000..6b1073b --- /dev/null +++ b/m/apex/configuration.nix @@ -0,0 +1,69 @@ +{ lib, config, pkgs, ... }: + +{ + imports = [ + ../common/xeon.nix + ../common/ssf/hosts.nix + ../module/ceph.nix + ../module/hut-substituter.nix + ../module/slurm-server.nix + ./nfs.nix + ./wireguard.nix + ]; + + # Don't install grub MBR for now + boot.loader.grub.device = "nodev"; + + boot.initrd.kernelModules = [ + "megaraid_sas" # For HW RAID + ]; + + environment.systemPackages = with pkgs; [ + storcli # To manage HW RAID + ]; + + fileSystems."/home" = { + device = "/dev/disk/by-label/home"; + fsType = "ext4"; + }; + + # No swap, there is plenty of RAM + swapDevices = lib.mkForce []; + + networking = { + hostName = "apex"; + defaultGateway = "84.88.53.233"; + nameservers = [ "8.8.8.8" ]; + + # Public facing interface + interfaces.eno1.ipv4.addresses = [ { + address = "84.88.53.236"; + prefixLength = 29; + } ]; + + # Internal LAN to our Ethernet switch + interfaces.eno2.ipv4.addresses = [ { + address = "10.0.40.30"; + prefixLength = 24; + } ]; + + # Infiniband over Omnipath switch (disconnected for now) + # interfaces.ibp5s0 = {}; + + nat = { + enable = true; + internalInterfaces = [ "eno2" ]; + externalInterface = "eno1"; + }; + }; + + networking.firewall = { + extraCommands = '' + # Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our + # logs. Insert as first position so we also protect SSH. + iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse + # Same with opsmonweb01.bsc.es which seems to be trying to access via SSH + iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse + ''; + }; +} diff --git a/m/apex/nfs.nix b/m/apex/nfs.nix new file mode 100644 index 0000000..8334d50 --- /dev/null +++ b/m/apex/nfs.nix @@ -0,0 +1,48 @@ +{ ... }: + +{ + services.nfs.server = { + enable = true; + lockdPort = 4001; + mountdPort = 4002; + statdPort = 4000; + exports = '' + /home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash) + /home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash) + ''; + }; + networking.firewall = { + # Check with `rpcinfo -p` + extraCommands = '' + # Accept NFS traffic from compute nodes but not from the outside + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept + # Same but UDP + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept + + # Accept NFS traffic from wg0 + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept + # Same but UDP + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept + iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept + ''; + }; +} diff --git a/m/apex/wireguard.nix b/m/apex/wireguard.nix new file mode 100644 index 0000000..4721d2d --- /dev/null +++ b/m/apex/wireguard.nix @@ -0,0 +1,42 @@ +{ config, ... }: + +{ + networking.firewall = { + allowedUDPPorts = [ 666 ]; + }; + + age.secrets.wgApex.file = ../../secrets/wg-apex.age; + + # Enable WireGuard + networking.wireguard.enable = true; + networking.wireguard.interfaces = { + # "wg0" is the network interface name. You can name the interface arbitrarily. + wg0 = { + ips = [ "10.106.0.30/24" ]; + listenPort = 666; + privateKeyFile = config.age.secrets.wgApex.path; + # Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA= + peers = [ + { + name = "fox"; + publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y="; + allowedIPs = [ "10.106.0.1/32" ]; + endpoint = "fox.ac.upc.edu:666"; + # Send keepalives every 25 seconds. Important to keep NAT tables alive. + persistentKeepalive = 25; + } + { + name = "raccoon"; + publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI="; + allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ]; + } + ]; + }; + }; + + networking.hosts = { + "10.106.0.1" = [ "fox" ]; + "10.106.0.236" = [ "raccoon" ]; + "10.0.44.4" = [ "tent" ]; + }; +} diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix new file mode 100644 index 0000000..47e8264 --- /dev/null +++ b/m/bay/configuration.nix @@ -0,0 +1,108 @@ +{ config, pkgs, lib, ... }: + +{ + imports = [ + ../common/ssf.nix + ../module/hut-substituter.nix + ../module/monitoring.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d"; + + boot.kernel.sysctl = { + "kernel.yama.ptrace_scope" = lib.mkForce "1"; + }; + + environment.systemPackages = with pkgs; [ + ceph + ]; + + networking = { + hostName = "bay"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.40"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.40"; + prefixLength = 24; + } ]; + firewall = { + extraCommands = '' + # Accept all incoming TCP traffic from lake2 + iptables -A nixos-fw -p tcp -s lake2 -j nixos-fw-accept + # Accept monitoring requests from hut + iptables -A nixos-fw -p tcp -s hut -m multiport --dport 9283,9002 -j nixos-fw-accept + # Accept all Ceph traffic from the local network + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept + ''; + }; + }; + + services.ceph = { + enable = true; + global = { + fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b"; + monHost = "10.0.40.40"; + monInitialMembers = "bay"; + clusterNetwork = "10.0.40.40/24"; # Use Ethernet only + }; + extraConfig = { + # Only log to stderr so it appears in the journal + "log_file" = "/dev/null"; + "mon_cluster_log_file" = "/dev/null"; + "log_to_stderr" = "true"; + "err_to_stderr" = "true"; + "log_to_file" = "false"; + }; + mds = { + enable = true; + daemons = [ "mds0" "mds1" ]; + extraConfig = { + "host" = "bay"; + }; + }; + mgr = { + enable = true; + daemons = [ "bay" ]; + }; + mon = { + enable = true; + daemons = [ "bay" ]; + }; + osd = { + enable = true; + # One daemon per NVME disk + daemons = [ "0" "1" "2" "3" ]; + extraConfig = { + "osd crush chooseleaf type" = "0"; + "osd journal size" = "10000"; + "osd pool default min size" = "2"; + "osd pool default pg num" = "200"; + "osd pool default pgp num" = "200"; + "osd pool default size" = "3"; + }; + }; + }; + + # Missing service for volumes, see: + # https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/ + systemd.services.ceph-volume = { + enable = true; + description = "Ceph Volume activation"; + unitConfig = { + Type = "oneshot"; + After = "local-fs.target"; + Wants = "local-fs.target"; + }; + path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ]; + serviceConfig = { + KillMode = "none"; + Environment = "CEPH_VOLUME_TIMEOUT=10000"; + ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'"; + TimeoutSec = "0"; + }; + wantedBy = [ "multi-user.target" ]; + }; +} diff --git a/m/common/base.nix b/m/common/base.nix new file mode 100644 index 0000000..cdd1dc1 --- /dev/null +++ b/m/common/base.nix @@ -0,0 +1,21 @@ +{ + # All machines should include this profile. + # Includes the basic configuration for an Intel server. + imports = [ + ./base/agenix.nix + ./base/always-power-on.nix + ./base/august-shutdown.nix + ./base/boot.nix + ./base/env.nix + ./base/fs.nix + ./base/hw.nix + ./base/net.nix + ./base/nix.nix + ./base/ntp.nix + ./base/rev.nix + ./base/ssh.nix + ./base/users.nix + ./base/watchdog.nix + ./base/zsh.nix + ]; +} diff --git a/m/common/base/agenix.nix b/m/common/base/agenix.nix new file mode 100644 index 0000000..3d5bb65 --- /dev/null +++ b/m/common/base/agenix.nix @@ -0,0 +1,9 @@ +{ agenix, ... }: + +{ + imports = [ agenix.nixosModules.default ]; + + environment.systemPackages = [ + agenix.packages.x86_64-linux.default + ]; +} diff --git a/m/common/base/always-power-on.nix b/m/common/base/always-power-on.nix new file mode 100644 index 0000000..cdee12c --- /dev/null +++ b/m/common/base/always-power-on.nix @@ -0,0 +1,8 @@ +{ + imports = [ + ../../module/power-policy.nix + ]; + + # Turn on as soon as we have power + power.policy = "always-on"; +} diff --git a/m/common/base/august-shutdown.nix b/m/common/base/august-shutdown.nix new file mode 100644 index 0000000..7f79c84 --- /dev/null +++ b/m/common/base/august-shutdown.nix @@ -0,0 +1,14 @@ +{ + # Shutdown all machines on August 3rd at 22:00, so we can protect the + # hardware from spurious electrical peaks on the yearly electrical cut for + # manteinance that starts on August 4th. + systemd.timers.august-shutdown = { + description = "Shutdown on August 3rd for maintenance"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnCalendar = "*-08-03 22:00:00"; + RandomizedDelaySec = "10min"; + Unit = "systemd-poweroff.service"; + }; + }; +} diff --git a/m/common/base/boot.nix b/m/common/base/boot.nix new file mode 100644 index 0000000..cfa4456 --- /dev/null +++ b/m/common/base/boot.nix @@ -0,0 +1,37 @@ +{ lib, pkgs, ... }: + +{ + # Use the GRUB 2 boot loader. + boot.loader.grub.enable = true; + + # Enable GRUB2 serial console + boot.loader.grub.extraConfig = '' + serial --unit=0 --speed=115200 --word=8 --parity=no --stop=1 + terminal_input --append serial + terminal_output --append serial + ''; + + boot.kernel.sysctl = { + "kernel.perf_event_paranoid" = lib.mkDefault "-1"; + + # Allow ptracing (i.e. attach with GDB) any process of the same user, see: + # https://www.kernel.org/doc/Documentation/security/Yama.txt + "kernel.yama.ptrace_scope" = "0"; + }; + + boot.kernelPackages = pkgs.linuxPackages_latest; + + #boot.kernelPatches = lib.singleton { + # name = "osnoise-tracer"; + # patch = null; + # extraStructuredConfig = with lib.kernel; { + # OSNOISE_TRACER = yes; + # HWLAT_TRACER = yes; + # }; + #}; + + boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "ehci_pci" "nvme" "usbhid" "sd_mod" ]; + boot.initrd.kernelModules = [ ]; + boot.kernelModules = [ "kvm-intel" ]; + boot.extraModulePackages = [ ]; +} diff --git a/m/common/base/env.nix b/m/common/base/env.nix new file mode 100644 index 0000000..e553050 --- /dev/null +++ b/m/common/base/env.nix @@ -0,0 +1,37 @@ +{ pkgs, config, ... }: + +{ + environment.systemPackages = with pkgs; [ + vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option + nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree + ncdu config.boot.kernelPackages.perf ldns pv + # From bsckgs overlay + osumb + ]; + + programs.direnv.enable = true; + + # Increase limits + security.pam.loginLimits = [ + { + domain = "*"; + type = "-"; + item = "memlock"; + value = "1048576"; # 1 GiB of mem locked + } + ]; + + environment.enableAllTerminfo = true; + + environment.variables = { + EDITOR = "vim"; + VISUAL = "vim"; + }; + + programs.bash.promptInit = '' + PS1="\h\\$ " + ''; + + time.timeZone = "Europe/Madrid"; + i18n.defaultLocale = "en_DK.UTF-8"; +} diff --git a/m/common/base/fs.nix b/m/common/base/fs.nix new file mode 100644 index 0000000..0c785b9 --- /dev/null +++ b/m/common/base/fs.nix @@ -0,0 +1,24 @@ +{ ... }: + +{ + fileSystems."/" = + { device = "/dev/disk/by-label/nixos"; + fsType = "ext4"; + }; + + # Trim unused blocks weekly + services.fstrim.enable = true; + + swapDevices = + [ { device = "/dev/disk/by-label/swap"; } + ]; + + # Tracing + fileSystems."/sys/kernel/tracing" = { + device = "none"; + fsType = "tracefs"; + }; + + # Mount a tmpfs into /tmp + boot.tmp.useTmpfs = true; +} diff --git a/m/common/base/hw.nix b/m/common/base/hw.nix new file mode 100644 index 0000000..7e4112c --- /dev/null +++ b/m/common/base/hw.nix @@ -0,0 +1,14 @@ +# Do not modify this file! It was generated by ‘nixos-generate-config’ +# and may be overwritten by future invocations. Please make changes +# to /etc/nixos/configuration.nix instead. +{ config, lib, pkgs, modulesPath, ... }: + +{ + imports = + [ (modulesPath + "/installer/scan/not-detected.nix") + ]; + + nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; + powerManagement.cpuFreqGovernor = lib.mkDefault "powersave"; + hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; +} diff --git a/m/common/base/net.nix b/m/common/base/net.nix new file mode 100644 index 0000000..9fe6c4d --- /dev/null +++ b/m/common/base/net.nix @@ -0,0 +1,23 @@ +{ pkgs, lib, ... }: + +{ + networking = { + enableIPv6 = false; + useDHCP = false; + + firewall = { + enable = true; + allowedTCPPorts = [ 22 ]; + }; + + # Make sure we use iptables + nftables.enable = lib.mkForce false; + + hosts = { + "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ]; + "84.88.51.142" = [ "raccoon-ipmi" ]; + "192.168.11.12" = [ "bscpm04.bsc.es" ]; + "192.168.11.15" = [ "gitlab-internal.bsc.es" ]; + }; + }; +} diff --git a/m/common/base/nix.nix b/m/common/base/nix.nix new file mode 100644 index 0000000..ed58f6e --- /dev/null +++ b/m/common/base/nix.nix @@ -0,0 +1,59 @@ +{ pkgs, nixpkgs, theFlake, ... }: + +{ + nixpkgs.overlays = [ + (import ../../../overlay.nix) + ]; + + nixpkgs.config.allowUnfree = true; + + nix = { + nixPath = [ + "nixpkgs=${nixpkgs}" + "jungle=${theFlake.outPath}" + ]; + + registry = { + nixpkgs.flake = nixpkgs; + jungle.flake = theFlake; + }; + + settings = { + experimental-features = [ "nix-command" "flakes" ]; + sandbox = "relaxed"; + trusted-users = [ "@wheel" ]; + flake-registry = pkgs.writeText "global-registry.json" + ''{"flakes":[],"version":2}''; + keep-outputs = true; + }; + + gc = { + automatic = true; + dates = "weekly"; + options = "--delete-older-than 30d"; + }; + }; + + # The nix-gc.service can begin its execution *before* /home is mounted, + # causing it to remove all gcroots considering them as stale, as it cannot + # access the symlink. To prevent this problem, we force the service to wait + # until /home is mounted as well as other remote FS like /ceph. + systemd.services.nix-gc = { + # Start remote-fs.target if not already being started and fail if it fails + # to start. It will also be stopped if the remote-fs.target fails after + # starting successfully. + bindsTo = [ "remote-fs.target" ]; + # Wait until remote-fs.target fully starts before starting this one. + after = [ "remote-fs.target"]; + # Ensure we can access a remote path inside /home + unitConfig.ConditionPathExists = "/home/Computational"; + }; + + # This value determines the NixOS release from which the default + # settings for stateful data, like file locations and database versions + # on your system were taken. It‘s perfectly fine and recommended to leave + # this value at the release version of the first install of this system. + # Before changing this value read the documentation for this option + # (e.g. man configuration.nix or on https://nixos.org/nixos/options.html). + system.stateVersion = "22.11"; # Did you read the comment? +} diff --git a/m/common/base/ntp.nix b/m/common/base/ntp.nix new file mode 100644 index 0000000..d4ddb25 --- /dev/null +++ b/m/common/base/ntp.nix @@ -0,0 +1,9 @@ +{ pkgs, ... }: + +{ + services.ntp.enable = true; + + # Use the NTP server at BSC, as we don't have direct access + # to the outside world + networking.timeServers = [ "84.88.52.36" ]; +} diff --git a/m/common/base/rev.nix b/m/common/base/rev.nix new file mode 100644 index 0000000..f2be747 --- /dev/null +++ b/m/common/base/rev.nix @@ -0,0 +1,21 @@ +{ theFlake, ... }: + +let + # Prevent building a configuration without revision + rev = if theFlake ? rev then theFlake.rev + else throw ("Refusing to build from a dirty Git tree!"); +in { + # Save the commit of the config in /etc/configrev + environment.etc.configrev.text = rev + "\n"; + + # Keep a log with the config over time + system.activationScripts.configRevLog.text = '' + BOOTED=$(cat /run/booted-system/etc/configrev 2>/dev/null || echo unknown) + CURRENT=$(cat /run/current-system/etc/configrev 2>/dev/null || echo unknown) + NEXT=${rev} + DATENOW=$(date --iso-8601=seconds) + echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log + ''; + + system.configurationRevision = rev; +} diff --git a/m/common/base/ssh.nix b/m/common/base/ssh.nix new file mode 100644 index 0000000..53de423 --- /dev/null +++ b/m/common/base/ssh.nix @@ -0,0 +1,18 @@ +{ lib, ... }: + +let + keys = import ../../../keys.nix; + hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts; +in +{ + # Enable the OpenSSH daemon. + services.openssh.enable = true; + + programs.ssh.knownHosts = hostsKeys // { + "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; + "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; + "bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT"; + "glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; + "glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; + }; +} diff --git a/m/common/base/users.nix b/m/common/base/users.nix new file mode 100644 index 0000000..6717fef --- /dev/null +++ b/m/common/base/users.nix @@ -0,0 +1,190 @@ +{ pkgs, ... }: + +{ + imports = [ + ../../module/jungle-users.nix + ]; + + users = { + mutableUsers = false; + users = { + # Generate hashedPassword with `mkpasswd -m sha-512` + + root.openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut" + ]; + + rarias = { + uid = 1880; + isNormalUser = true; + linger = true; + home = "/home/Computational/rarias"; + description = "Rodrigo Arias"; + group = "Computational"; + extraGroups = [ "wheel" ]; + hashedPassword = "$6$u06tkCy13enReBsb$xiI.twRvvTfH4jdS3s68NZ7U9PSbGKs5.LXU/UgoawSwNWhZo2hRAjNL5qG0/lAckzcho2LjD0r3NfVPvthY6/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal" + ]; + shell = pkgs.zsh; + }; + + arocanon = { + uid = 1042; + isNormalUser = true; + home = "/home/Computational/arocanon"; + description = "Aleix Roca"; + group = "Computational"; + extraGroups = [ "wheel" "tracing" ]; + hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland" + ]; + }; + }; + + jungleUsers = { + rpenacob = { + uid = 2761; + isNormalUser = true; + home = "/home/Computational/rpenacob"; + description = "Raúl Peñacoba"; + group = "Computational"; + hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ]; + hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" + ]; + }; + + anavarro = { + uid = 1037; + isNormalUser = true; + home = "/home/Computational/anavarro"; + description = "Antoni Navarro"; + group = "Computational"; + hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ]; + hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni" + ]; + }; + + abonerib = { + uid = 4541; + isNormalUser = true; + home = "/home/Computational/abonerib"; + description = "Aleix Boné"; + group = "Computational"; + hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ]; + hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" + ]; + }; + + vlopez = { + uid = 4334; + isNormalUser = true; + home = "/home/Computational/vlopez"; + description = "Victor López"; + group = "Computational"; + hosts = [ "apex" "koro" ]; + hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch" + ]; + }; + + dbautist = { + uid = 5649; + isNormalUser = true; + home = "/home/Computational/dbautist"; + description = "Dylan Bautista Cases"; + group = "Computational"; + hosts = [ "apex" "hut" "tent" "raccoon" ]; + hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" + ]; + }; + + dalvare1 = { + uid = 2758; + isNormalUser = true; + home = "/home/Computational/dalvare1"; + description = "David Álvarez"; + group = "Computational"; + hosts = [ "apex" "hut" "tent" "fox" ]; + hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" + ]; + }; + + varcila = { + uid = 5650; + isNormalUser = true; + home = "/home/Computational/varcila"; + description = "Vincent Arcila"; + group = "Computational"; + hosts = [ "apex" "hut" "tent" "fox" ]; + hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch" + ]; + }; + + pmartin1 = { + # Arbitrary UID but large so it doesn't collide with other users on ssfhead. + uid = 9652; + isNormalUser = true; + home = "/home/Computational/pmartin1"; + description = "Pedro J. Martinez-Ferrer"; + group = "Computational"; + hosts = [ "fox" ]; + hashedPassword = "$6$nIgDMGnt4YIZl3G.$.JQ2jXLtDPRKsbsJfJAXdSvjDIzRrg7tNNjPkLPq3KJQhMjfDXRUvzagUHUU2TrE2hHM8/6uq8ex0UdxQ0ysl."; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a" + ]; + }; + + csiringo = { + uid = 9653; + isNormalUser = true; + home = "/home/Computational/csiringo"; + description = "Cesare Siringo"; + group = "Computational"; + hosts = [ ]; + hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es" + ]; + }; + + acinca = { + uid = 9654; + isNormalUser = true; + home = "/home/Computational/acinca"; + description = "Arnau Cinca"; + group = "Computational"; + hosts = [ "apex" "hut" "fox" "owl1" "owl2" ]; + hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc" + ]; + }; + }; + + groups = { + Computational = { gid = 564; }; + tracing = { }; + }; + }; +} diff --git a/m/common/base/watchdog.nix b/m/common/base/watchdog.nix new file mode 100644 index 0000000..d4d297d --- /dev/null +++ b/m/common/base/watchdog.nix @@ -0,0 +1,9 @@ +{ ... }: + +{ + # The boards have a BMC watchdog controlled by IPMI + boot.kernelModules = [ "ipmi_watchdog" ]; + + # Enable systemd watchdog with 30 s interval + systemd.watchdog.runtimeTime = "30s"; +} diff --git a/m/common/base/zsh.nix b/m/common/base/zsh.nix new file mode 100644 index 0000000..5cfcb7f --- /dev/null +++ b/m/common/base/zsh.nix @@ -0,0 +1,91 @@ +{ pkgs, ... }: + +{ + environment.systemPackages = with pkgs; [ + zsh-completions + nix-zsh-completions + ]; + + programs.zsh = { + enable = true; + histSize = 1000000; + + shellInit = '' + # Disable new user prompt + if [ ! -e ~/.zshrc ]; then + touch ~/.zshrc + fi + ''; + + promptInit = '' + # Note that to manually override this in ~/.zshrc you should run `prompt off` + # before setting your PS1 and etc. Otherwise this will likely to interact with + # your ~/.zshrc configuration in unexpected ways as the default prompt sets + # a lot of different prompt variables. + autoload -U promptinit && promptinit && prompt default && setopt prompt_sp + ''; + + # Taken from Ulli Kehrle config: + # https://git.hrnz.li/Ulli/nixos/src/commit/2e203b8d8d671f4e3ced0f1744a51d5c6ee19846/profiles/shell.nix#L199-L205 + interactiveShellInit = '' + source "${pkgs.zsh-history-substring-search}/share/zsh-history-substring-search/zsh-history-substring-search.zsh" + + # Save history immediately, but only load it when the shell starts + setopt inc_append_history + + # dircolors doesn't support alacritty: + # https://lists.gnu.org/archive/html/bug-coreutils/2019-05/msg00029.html + export LS_COLORS='rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=00:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.avif=01;35:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:*~=00;90:*#=00;90:*.bak=00;90:*.old=00;90:*.orig=00;90:*.part=00;90:*.rej=00;90:*.swp=00;90:*.tmp=00;90:*.dpkg-dist=00;90:*.dpkg-old=00;90:*.ucf-dist=00;90:*.ucf-new=00;90:*.ucf-old=00;90:*.rpmnew=00;90:*.rpmorig=00;90:*.rpmsave=00;90:'; + + # From Arch Linux and GRML + bindkey "^R" history-incremental-pattern-search-backward + bindkey "^S" history-incremental-pattern-search-forward + + # Auto rehash for new binaries + zstyle ':completion:*' rehash true + # show a nice menu with the matches + zstyle ':completion:*' menu yes select + + bindkey '^[OA' history-substring-search-up # Up + bindkey '^[[A' history-substring-search-up # Up + + bindkey '^[OB' history-substring-search-down # Down + bindkey '^[[B' history-substring-search-down # Down + + bindkey '\e[1~' beginning-of-line # Home + bindkey '\e[7~' beginning-of-line # Home + bindkey '\e[H' beginning-of-line # Home + bindkey '\eOH' beginning-of-line # Home + + bindkey '\e[4~' end-of-line # End + bindkey '\e[8~' end-of-line # End + bindkey '\e[F ' end-of-line # End + bindkey '\eOF' end-of-line # End + + bindkey '^?' backward-delete-char # Backspace + bindkey '\e[3~' delete-char # Del + # bindkey '\e[3;5~' delete-char # sometimes Del, sometimes C-Del + bindkey '\e[2~' overwrite-mode # Ins + + bindkey '^H' backward-kill-word # C-Backspace + + bindkey '5~' kill-word # C-Del + bindkey '^[[3;5~' kill-word # C-Del + bindkey '^[[3^' kill-word # C-Del + + bindkey "^[[1;5H" backward-kill-line # C-Home + bindkey "^[[7^" backward-kill-line # C-Home + + bindkey "^[[1;5F" kill-line # C-End + bindkey "^[[8^" kill-line # C-End + + bindkey '^[[1;5C' forward-word # C-Right + bindkey '^[0c' forward-word # C-Right + bindkey '^[[5C' forward-word # C-Right + + bindkey '^[[1;5D' backward-word # C-Left + bindkey '^[0d' backward-word # C-Left + bindkey '^[[5D' backward-word # C-Left + ''; + }; +} diff --git a/m/common/ssf.nix b/m/common/ssf.nix new file mode 100644 index 0000000..ef74da3 --- /dev/null +++ b/m/common/ssf.nix @@ -0,0 +1,10 @@ +{ + # Provides the base system for a xeon node in the SSF rack. + imports = [ + ./xeon.nix + ./ssf/fs.nix + ./ssf/hosts.nix + ./ssf/hosts-remote.nix + ./ssf/net.nix + ]; +} diff --git a/m/common/ssf/fs.nix b/m/common/ssf/fs.nix new file mode 100644 index 0000000..c50b3ff --- /dev/null +++ b/m/common/ssf/fs.nix @@ -0,0 +1,8 @@ +{ + # Mount the home via NFS + fileSystems."/home" = { + device = "10.0.40.30:/home"; + fsType = "nfs"; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; + }; +} diff --git a/m/common/ssf/hosts-remote.nix b/m/common/ssf/hosts-remote.nix new file mode 100644 index 0000000..1660f73 --- /dev/null +++ b/m/common/ssf/hosts-remote.nix @@ -0,0 +1,9 @@ +{ pkgs, ... }: + +{ + networking.hosts = { + # Remote hosts visible from compute nodes + "10.106.0.236" = [ "raccoon" ]; + "10.0.44.4" = [ "tent" ]; + }; +} diff --git a/m/common/ssf/hosts.nix b/m/common/ssf/hosts.nix new file mode 100644 index 0000000..cd99eb9 --- /dev/null +++ b/m/common/ssf/hosts.nix @@ -0,0 +1,23 @@ +{ pkgs, ... }: + +{ + networking.hosts = { + # Login + "10.0.40.30" = [ "apex" ]; + + # Storage + "10.0.40.40" = [ "bay" ]; "10.0.42.40" = [ "bay-ib" ]; "10.0.40.141" = [ "bay-ipmi" ]; + "10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ]; + "10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ]; "10.0.40.143" = [ "lake2-ipmi" ]; + + # Xeon compute + "10.0.40.1" = [ "owl1" ]; "10.0.42.1" = [ "owl1-ib" ]; "10.0.40.101" = [ "owl1-ipmi" ]; + "10.0.40.2" = [ "owl2" ]; "10.0.42.2" = [ "owl2-ib" ]; "10.0.40.102" = [ "owl2-ipmi" ]; + "10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ]; + #"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ]; + "10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ]; + "10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ]; + "10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ]; + "10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ]; + }; +} diff --git a/m/common/ssf/net.nix b/m/common/ssf/net.nix new file mode 100644 index 0000000..911e180 --- /dev/null +++ b/m/common/ssf/net.nix @@ -0,0 +1,23 @@ +{ pkgs, ... }: + +{ + # Infiniband (IPoIB) + environment.systemPackages = [ pkgs.rdma-core ]; + boot.kernelModules = [ "ib_umad" "ib_ipoib" ]; + + networking = { + defaultGateway = "10.0.40.30"; + nameservers = ["8.8.8.8"]; + + firewall = { + extraCommands = '' + # Prevent ssfhead from contacting our slurmd daemon + iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse + # But accept traffic to slurm ports from any other node in the subnet + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept + # We also need to open the srun port range + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept + ''; + }; + }; +} diff --git a/m/common/xeon.nix b/m/common/xeon.nix new file mode 100644 index 0000000..1394660 --- /dev/null +++ b/m/common/xeon.nix @@ -0,0 +1,7 @@ +{ + # Provides the base system for a xeon node, not necessarily in the SSF rack. + imports = [ + ./base.nix + ./xeon/console.nix + ]; +} diff --git a/m/common/xeon/console.nix b/m/common/xeon/console.nix new file mode 100644 index 0000000..e4c3644 --- /dev/null +++ b/m/common/xeon/console.nix @@ -0,0 +1,14 @@ +{ + # Restart the serial console + systemd.services."serial-getty@ttyS0" = { + enable = true; + wantedBy = [ "getty.target" ]; + serviceConfig.Restart = "always"; + }; + + # Enable serial console + boot.kernelParams = [ + "console=tty1" + "console=ttyS0,115200" + ]; +} diff --git a/m/eudy/configuration.nix b/m/eudy/configuration.nix new file mode 100644 index 0000000..ddb894e --- /dev/null +++ b/m/eudy/configuration.nix @@ -0,0 +1,38 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/ssf.nix + #(modulesPath + "/installer/netboot/netboot-minimal.nix") + + ./kernel/kernel.nix + ./cpufreq.nix + ./fs.nix + ./users.nix + ../module/hut-substituter.nix + ../module/debuginfod.nix + ]; + + # Select this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53564b"; + + # disable automatic garbage collector + nix.gc.automatic = lib.mkForce false; + + # members of the tracing group can use the lttng-provided kernel events + # without root permissions + users.groups.tracing.members = [ "arocanon" ]; + + # set up both ethernet and infiniband ips + networking = { + hostName = "eudy"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.8"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.8"; + prefixLength = 24; + } ]; + }; +} diff --git a/m/eudy/cpufreq.nix b/m/eudy/cpufreq.nix new file mode 100644 index 0000000..29498c4 --- /dev/null +++ b/m/eudy/cpufreq.nix @@ -0,0 +1,40 @@ +{ lib, ... }: + +{ + # Disable frequency boost by default. Use the intel_pstate driver instead of + # acpi_cpufreq driver because the acpi_cpufreq driver does not read the + # complete range of P-States [1]. Use the intel_pstate passive mode [2] to + # disable HWP, which allows a core to "select P-states by itself". Also, this + # disables intel governors, which confusingly, have the same names as the + # generic ones but behave differently [3]. + + # Essentially, we use the generic governors, but use the intel driver to read + # the P-state list. + + # [1] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#intel-pstate-vs-acpi-cpufreq + # [2] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#passive-mode + # [3] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#active-mode + # https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html + + # set intel_pstate to passive mode + boot.kernelParams = [ + "intel_pstate=passive" + ]; + # Disable frequency boost + system.activationScripts = { + disableFrequencyBoost.text = '' + echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo + ''; + }; + + ## disable intel_pstate + #boot.kernelParams = [ + # "intel_pstate=disable" + #]; + ## Disable frequency boost + #system.activationScripts = { + # disableFrequencyBoost.text = '' + # echo 0 > /sys/devices/system/cpu/cpufreq/boost + # ''; + #}; +} diff --git a/m/eudy/fs.nix b/m/eudy/fs.nix new file mode 100644 index 0000000..1c1526a --- /dev/null +++ b/m/eudy/fs.nix @@ -0,0 +1,13 @@ +{ ... }: + +{ + fileSystems."/nix" = { + device = "/dev/disk/by-label/optane"; + fsType = "ext4"; + neededForBoot = true; + }; + fileSystems."/mnt/data" = { + device = "/dev/disk/by-label/data"; + fsType = "ext4"; + }; +} diff --git a/m/eudy/kernel/kernel.nix b/m/eudy/kernel/kernel.nix new file mode 100644 index 0000000..98310ce --- /dev/null +++ b/m/eudy/kernel/kernel.nix @@ -0,0 +1,70 @@ +{ pkgs, lib, ... }: + +let + #fcs-devel = pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = /mnt/data/kernel/fcs/kernel/src; + # configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig; + #}; + + #fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false; + #fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false; + #fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true; + #fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true; + #fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = builtins.fetchGit { + # url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + # rev = gitCommit; + # ref = "fcs"; + # }; + # configfile = if lockdep then ./configs/lockdep else ./configs/defconfig; + #}; + + kernel = nixos-fcs; + + nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + version = "6.2.8"; + src = builtins.fetchGit { + url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + rev = gitCommit; + ref = branch; + }; + structuredExtraConfig = with lib.kernel; { + # add general custom kernel options here + } // lib.optionalAttrs lockStat { + LOCK_STAT = yes; + } // lib.optionalAttrs preempt { + PREEMPT = lib.mkForce yes; + PREEMPT_VOLUNTARY = lib.mkForce no; + }; + kernelPatches = []; + extraMeta.branch = lib.versions.majorMinor version; + })); + + nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";}; + nixos-fcs-lockstat = nixos-fcs.override { + lockStat = true; + }; + nixos-fcs-lockstat-preempt = nixos-fcs.override { + lockStat = true; + preempt = true; + }; + latest = pkgs.linuxPackages_latest; + +in { + imports = [ + ./lttng.nix + ./perf.nix + ]; + boot.kernelPackages = lib.mkForce kernel; + + # disable all cpu mitigations + boot.kernelParams = [ + "mitigations=off" + ]; + + # enable memory overcommit, needed to build a taglibc system using nix after + # increasing the openblas memory footprint + boot.kernel.sysctl."vm.overcommit_memory" = 1; +} diff --git a/m/eudy/kernel/lttng.nix b/m/eudy/kernel/lttng.nix new file mode 100644 index 0000000..eb45911 --- /dev/null +++ b/m/eudy/kernel/lttng.nix @@ -0,0 +1,43 @@ +{ config, pkgs, lib, ... }: + +let + + # The lttng btrfs probe crashes at compile time because of an undefined + # function. This disables the btrfs tracepoints to avoid the issue. + + # Also enable lockdep tracepoints, this is disabled by default because it + # does not work well on architectures other than x86_64 (i think that arm) as + # I was told on the mailing list. + lttng-modules-fixed = config.boot.kernelPackages.lttng-modules.overrideAttrs (finalAttrs: previousAttrs: { + patchPhase = (lib.optionalString (previousAttrs ? patchPhase) previousAttrs.patchPhase) + '' + # disable btrfs + substituteInPlace src/probes/Kbuild \ + --replace " obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o" " #obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o" + + # enable lockdep tracepoints + substituteInPlace src/probes/Kbuild \ + --replace "#ifneq (\$(CONFIG_LOCKDEP),)" "ifneq (\$(CONFIG_LOCKDEP),)" \ + --replace "# obj-\$(CONFIG_LTTNG) += lttng-probe-lock.o" " obj-\$(CONFIG_LTTNG) += lttng-probe-lock.o" \ + --replace "#endif # CONFIG_LOCKDEP" "endif # CONFIG_LOCKDEP" + ''; + }); +in { + + # add the lttng tools and modules to the system environment + boot.extraModulePackages = [ lttng-modules-fixed ]; + environment.systemPackages = with pkgs; [ + lttng-tools lttng-ust babeltrace + ]; + + # start the lttng root daemon to manage kernel events + systemd.services.lttng-sessiond = { + wantedBy = [ "multi-user.target" ]; + description = "LTTng session daemon for the root user"; + serviceConfig = { + User = "root"; + ExecStart = '' + ${pkgs.lttng-tools}/bin/lttng-sessiond + ''; + }; + }; +} diff --git a/m/eudy/kernel/perf.nix b/m/eudy/kernel/perf.nix new file mode 100644 index 0000000..51340df --- /dev/null +++ b/m/eudy/kernel/perf.nix @@ -0,0 +1,22 @@ +{ config, pkgs, lib, ... }: + +{ + # add the perf tool + environment.systemPackages = with pkgs; [ + config.boot.kernelPackages.perf + ]; + + # allow non-root users to read tracing data from the kernel + boot.kernel.sysctl."kernel.perf_event_paranoid" = -2; + boot.kernel.sysctl."kernel.kptr_restrict" = 0; + + # specify additionl options to the tracefs directory to allow members of the + # tracing group to access tracefs. + fileSystems."/sys/kernel/tracing" = { + options = [ + "mode=755" + "gid=tracing" + ]; + }; +} + diff --git a/m/eudy/users.nix b/m/eudy/users.nix new file mode 100644 index 0000000..a1cfab4 --- /dev/null +++ b/m/eudy/users.nix @@ -0,0 +1,11 @@ +{ ... }: + +{ + security.sudo.extraRules= [{ + users = [ "arocanon" ]; + commands = [{ + command = "ALL" ; + options= [ "NOPASSWD" ]; # "SETENV" # Adding the following could be a good idea + }]; + }]; +} diff --git a/m/fox/configuration.nix b/m/fox/configuration.nix new file mode 100644 index 0000000..8c381f8 --- /dev/null +++ b/m/fox/configuration.nix @@ -0,0 +1,112 @@ +{ lib, config, pkgs, ... }: + +{ + imports = [ + ../common/base.nix + ../common/xeon/console.nix + ../module/amd-uprof.nix + ../module/emulation.nix + ../module/nvidia.nix + ../module/slurm-client.nix + ../module/hut-substituter.nix + ./wireguard.nix + ]; + + # Don't turn off on August as UPC has different dates. + # Fox works fine on power cuts. + systemd.timers.august-shutdown.enable = false; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103"; + + # No swap, there is plenty of RAM + swapDevices = lib.mkForce []; + + boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ]; + boot.kernelModules = [ "kvm-amd" "amd_uncore" "amd_hsmp" ]; + + hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; + hardware.cpu.intel.updateMicrocode = lib.mkForce false; + + # Use performance for benchmarks + powerManagement.cpuFreqGovernor = "performance"; + + services.amd-uprof.enable = true; + + # Disable NUMA balancing + boot.kernel.sysctl."kernel.numa_balancing" = 0; + + # Expose kernel addresses + boot.kernel.sysctl."kernel.kptr_restrict" = 0; + + # Disable NMI watchdog to save one hw counter (for AMD uProf) + boot.kernel.sysctl."kernel.nmi_watchdog" = 0; + + services.openssh.settings.X11Forwarding = true; + + services.fail2ban.enable = true; + + networking = { + timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ]; + hostName = "fox"; + # UPC network (may change over time, use DHCP) + # Public IP configuration: + # - Hostname: fox.ac.upc.edu + # - IP: 147.83.30.141 + # - Gateway: 147.83.30.130 + # - NetMask: 255.255.255.192 + # Private IP configuration for BMC: + # - Hostname: fox-ipmi.ac.upc.edu + # - IP: 147.83.35.27 + # - Gateway: 147.83.35.2 + # - NetMask: 255.255.255.0 + interfaces.enp1s0f0np0.useDHCP = true; + }; + + # Recommended for new graphics cards + hardware.nvidia.open = true; + + # Mount NVME disks + fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; }; + fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; }; + + # Mount the NFS home + fileSystems."/nfs/home" = { + device = "10.106.0.30:/home"; + fsType = "nfs"; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; + }; + + # Make a /nvme{0,1}/$USER directory for each user. + systemd.services.create-nvme-dirs = let + # Take only normal users in fox + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + commands = lib.concatLists (lib.mapAttrsToList + (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}" + ]) users); + script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands); + in { + enable = true; + wants = [ "local-fs.target" ]; + after = [ "local-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = script; + }; + + # Only allow SSH connections from users who have a SLURM allocation + # See: https://slurm.schedmd.com/pam_slurm_adopt.html + security.pam.services.sshd.rules.account.slurm = { + control = "required"; + enable = true; + modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so"; + args = [ "log_level=debug5" ]; + order = 999999; # Make it last one + }; + + # Disable systemd session (pam_systemd.so) as it will conflict with the + # pam_slurm_adopt.so module. What happens is that the shell is first adopted + # into the slurmstepd task and then into the systemd session, which is not + # what we want, otherwise it will linger even if all jobs are gone. + security.pam.services.sshd.startSession = lib.mkForce false; +} diff --git a/m/fox/wireguard.nix b/m/fox/wireguard.nix new file mode 100644 index 0000000..f93c3e4 --- /dev/null +++ b/m/fox/wireguard.nix @@ -0,0 +1,54 @@ +{ config, ... }: + +{ + networking.firewall = { + allowedUDPPorts = [ 666 ]; + }; + + age.secrets.wgFox.file = ../../secrets/wg-fox.age; + + networking.wireguard.enable = true; + networking.wireguard.interfaces = { + # "wg0" is the network interface name. You can name the interface arbitrarily. + wg0 = { + # Determines the IP address and subnet of the server's end of the tunnel interface. + ips = [ "10.106.0.1/24" ]; + + # The port that WireGuard listens to. Must be accessible by the client. + listenPort = 666; + + # Path to the private key file. + privateKeyFile = config.age.secrets.wgFox.path; + # Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y= + + peers = [ + # List of allowed peers. + { + name = "apex"; + publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA="; + # List of IPs assigned to this peer within the tunnel subnet. Used to configure routing. + allowedIPs = [ "10.106.0.30/32" "10.0.40.7/32" ]; + } + { + name = "raccoon"; + publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI="; + allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ]; + } + ]; + }; + }; + + networking.hosts = { + "10.106.0.30" = [ "apex" ]; + "10.0.40.7" = [ "hut" ]; + "10.106.0.236" = [ "raccoon" ]; + "10.0.44.4" = [ "tent" ]; + }; + + networking.firewall = { + extraCommands = '' + # Accept slurm connections to slurmd from apex (via wireguard) + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept + ''; + }; +} diff --git a/m/hut/blackbox.yml b/m/hut/blackbox.yml new file mode 100644 index 0000000..a4c12d2 --- /dev/null +++ b/m/hut/blackbox.yml @@ -0,0 +1,14 @@ +modules: + http_2xx: + prober: http + timeout: 5s + http: + follow_redirects: true + preferred_ip_protocol: "ip4" + valid_status_codes: [] # Defaults to 2xx + method: GET + icmp: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix new file mode 100644 index 0000000..9e8c1a2 --- /dev/null +++ b/m/hut/configuration.nix @@ -0,0 +1,67 @@ +{ config, pkgs, lib, ... }: + +{ + imports = [ + ../common/ssf.nix + + ../module/ceph.nix + ../module/debuginfod.nix + ../module/emulation.nix + ./gitlab-runner.nix + ./monitoring.nix + ./nfs.nix + ./nix-serve.nix + ./public-inbox.nix + ./gitea.nix + ./msmtp.nix + ./postgresql.nix + ./nginx.nix + ./p.nix + #./pxe.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f"; + + fileSystems = { + "/" = lib.mkForce { + device = "/dev/disk/by-label/nvme"; + fsType = "ext4"; + neededForBoot = true; + options = [ "noatime" ]; + }; + + "/boot" = lib.mkForce { + device = "/dev/disk/by-label/nixos-boot"; + fsType = "ext4"; + neededForBoot = true; + }; + }; + + networking = { + hostName = "hut"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.7"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.7"; + prefixLength = 24; + } ]; + firewall = { + extraCommands = '' + # Accept all proxy traffic from compute nodes but not the login + iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept + ''; + # Flush all rules and chains on stop so it won't break on start + extraStopCommands = '' + iptables -F + iptables -X + ''; + }; + }; + + # Allow proxy to bind to the ethernet interface + services.openssh.settings.GatewayPorts = "clientspecified"; +} diff --git a/m/hut/gitea.nix b/m/hut/gitea.nix new file mode 100644 index 0000000..02e0d50 --- /dev/null +++ b/m/hut/gitea.nix @@ -0,0 +1,63 @@ +{ config, lib, ... }: +{ + age.secrets.giteaRunnerToken.file = ../../secrets/gitea-runner-token.age; + + services.gitea = { + enable = true; + appName = "Gitea in the jungle"; + + settings = { + server = { + ROOT_URL = "https://jungle.bsc.es/git/"; + LOCAL_ROOT_URL = "https://jungle.bsc.es/git/"; + LANDING_PAGE = "explore"; + }; + metrics.ENABLED = true; + service = { + REGISTER_MANUAL_CONFIRM = true; + ENABLE_NOTIFY_MAIL = true; + }; + log.LEVEL = "Warn"; + + mailer = { + ENABLED = true; + FROM = "jungle-robot@bsc.es"; + PROTOCOL = "sendmail"; + SENDMAIL_PATH = "/run/wrappers/bin/sendmail"; + SENDMAIL_ARGS = "--"; + }; + }; + }; + + services.gitea-actions-runner.instances = { + runrun = { + enable = true; + name = "runrun"; + url = "https://jungle.bsc.es/git/"; + tokenFile = config.age.secrets.giteaRunnerToken.path; + labels = [ "native:host" ]; + settings.runner.capacity = 8; + }; + }; + + systemd.services.gitea-runner-runrun = { + path = [ "/run/current-system/sw" ]; + serviceConfig = { + # DynamicUser doesn't work well with SSH + DynamicUser = lib.mkForce false; + User = "gitea-runner"; + Group = "gitea-runner"; + }; + }; + + users.users.gitea-runner = { + isSystemUser = true; + home = "/var/lib/gitea-runner"; + description = "Gitea Runner"; + group = "gitea-runner"; + extraGroups = [ "docker" ]; + createHome = true; + }; + users.groups.gitea-runner = {}; +} + diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix new file mode 100644 index 0000000..2fe7c1c --- /dev/null +++ b/m/hut/gitlab-runner.nix @@ -0,0 +1,126 @@ +{ pkgs, lib, config, ... }: + +{ + age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age; + age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age; + age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age; + + services.gitlab-runner = { + enable = true; + settings.concurrent = 5; + services = let + common-shell = { + executor = "shell"; + environmentVariables = { + SHELL = "${pkgs.bash}/bin/bash"; + }; + }; + common-docker = { + executor = "docker"; + dockerImage = "debian:stable"; + registrationFlags = [ + "--docker-network-mode host" + ]; + environmentVariables = { + https_proxy = "http://hut:23080"; + http_proxy = "http://hut:23080"; + }; + }; + in { + # For pm.bsc.es/gitlab + gitlab-pm-shell = common-shell // { + authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path; + }; + gitlab-pm-docker = common-docker // { + authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path; + }; + + gitlab-bsc-docker = { + # gitlab.bsc.es still uses the old token mechanism + registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path; + tagList = [ "docker" "hut" ]; + environmentVariables = { + # We cannot access the hut local interface from docker, so we connect + # to hut directly via the ethernet one. + https_proxy = "http://hut:23080"; + http_proxy = "http://hut:23080"; + }; + executor = "docker"; + dockerImage = "alpine"; + dockerVolumes = [ + "/nix/store:/nix/store:ro" + "/nix/var/nix/db:/nix/var/nix/db:ro" + "/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro" + ]; + dockerExtraHosts = [ + # Required to pass the proxy via hut + "hut:10.0.40.7" + ]; + dockerDisableCache = true; + registrationFlags = [ + # Increase build log length to 64 MiB + "--output-limit 65536" + ]; + preBuildScript = pkgs.writeScript "setup-container" '' + mkdir -p -m 0755 /nix/var/log/nix/drvs + mkdir -p -m 0755 /nix/var/nix/gcroots + mkdir -p -m 0755 /nix/var/nix/profiles + mkdir -p -m 0755 /nix/var/nix/temproots + mkdir -p -m 0755 /nix/var/nix/userpool + mkdir -p -m 1777 /nix/var/nix/gcroots/per-user + mkdir -p -m 1777 /nix/var/nix/profiles/per-user + mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root + mkdir -p -m 0700 "$HOME/.nix-defexpr" + mkdir -p -m 0700 "$HOME/.ssh" + cat > "$HOME/.ssh/config" << EOF + Host bscpm04.bsc.es gitlab-internal.bsc.es + User git + ProxyCommand nc -X connect -x hut:23080 %h %p + Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es + ProxyCommand nc -X connect -x hut:23080 %h %p + EOF + cat >> "$HOME/.ssh/known_hosts" << EOF + bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT + gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3 + EOF + . ${pkgs.nix}/etc/profile.d/nix-daemon.sh + # Required to load SSL certificate paths + . ${pkgs.cacert}/nix-support/setup-hook + ''; + environmentVariables = { + ENV = "/etc/profile"; + USER = "root"; + NIX_REMOTE = "daemon"; + PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin"; + }; + }; + }; + }; + + # DOCKER* chains are useless, override at FORWARD and nixos-fw + networking.firewall.extraCommands = '' + # Don't forward any traffic from docker + iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse + + # Allow incoming traffic from docker to 23080 + iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT + ''; + + #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; + systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; + systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; + systemd.services.gitlab-runner.serviceConfig.Group = "gitlab-runner"; + systemd.services.gitlab-runner.serviceConfig.ExecStart = lib.mkForce + ''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}''; + + users.users.gitlab-runner = { + uid = config.ids.uids.gitlab-runner; + #isNormalUser = true; + home = "/var/lib/gitlab-runner"; + description = "Gitlab Runner"; + group = "gitlab-runner"; + extraGroups = [ "docker" ]; + createHome = true; + }; + users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner; +} diff --git a/m/hut/gpfs-probe.nix b/m/hut/gpfs-probe.nix new file mode 100644 index 0000000..d4a0d98 --- /dev/null +++ b/m/hut/gpfs-probe.nix @@ -0,0 +1,31 @@ +{ pkgs, config, lib, ... }: +let + gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { } + '' + cp ${./gpfs-probe.sh} $out; + chmod +x $out + '' + ; +in +{ + # Use a new user to handle the SSH keys + users.groups.ssh-robot = { }; + users.users.ssh-robot = { + description = "SSH Robot"; + isNormalUser = true; + home = "/var/lib/ssh-robot"; + }; + + systemd.services.gpfs-probe = { + description = "Daemon to report GPFS latency via SSH"; + path = [ pkgs.openssh pkgs.netcat ]; + after = [ "network.target" ]; + wantedBy = [ "default.target" ]; + serviceConfig = { + Type = "simple"; + ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}"; + User = "ssh-robot"; + Group = "ssh-robot"; + }; + }; +} diff --git a/m/hut/gpfs-probe.sh b/m/hut/gpfs-probe.sh new file mode 100755 index 0000000..b8f7f82 --- /dev/null +++ b/m/hut/gpfs-probe.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +N=500 + +t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}") + +if [ -z "$t" ]; then + t="5.00" +fi + +cat <" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; + } + ]; + } + { + job_name = "blackbox-icmp"; + metrics_path = "/probe"; + params = { module = [ "icmp" ]; }; + static_configs = [{ + targets = [ + "1.1.1.1" + "8.8.8.8" + "ssfhead" + "anella-bsc.cesca.cat" + "upc-anella.cesca.cat" + "fox.ac.upc.edu" + "arenys5.ac.upc.edu" + ]; + }]; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; + } + ]; + } + { + job_name = "gitea"; + static_configs = [{ targets = [ "127.0.0.1:3000" ]; }]; + } + { + # Scrape the IPMI info of the hosts remotely via LAN + job_name = "ipmi-lan"; + scrape_interval = "1m"; + scrape_timeout = "30s"; + metrics_path = "/ipmi"; + scheme = "http"; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + separator = ";"; + regex = "(.*)(:80)?"; + target_label = "__param_target"; + replacement = "\${1}"; + action = "replace"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + separator = ";"; + regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end + target_label = "instance"; + replacement = "\${1}"; + action = "replace"; + } + { + # Sets the fixed "module=lan" URL param + separator = ";"; + regex = "(.*)"; + target_label = "__param_module"; + replacement = "lan"; + action = "replace"; + } + { + # Sets the target to query as the localhost IPMI exporter + separator = ";"; + regex = ".*"; + target_label = "__address__"; + replacement = "127.0.0.1:9290"; + action = "replace"; + } + ]; + + # Load the list of targets from another file + file_sd_configs = [ + { + files = [ "${./targets.yml}" ]; + refresh_interval = "30s"; + } + ]; + } + { + job_name = "ipmi-raccoon"; + metrics_path = "/ipmi"; + static_configs = [ + { targets = [ "127.0.0.1:9291" ]; } + ]; + params = { + target = [ "84.88.51.142" ]; + module = [ "raccoon" ]; + }; + } + { + job_name = "raccoon"; + static_configs = [ + { + targets = [ "127.0.0.1:19002" ]; # Node exporter + } + ]; + } + ]; + }; +} diff --git a/m/hut/msmtp.nix b/m/hut/msmtp.nix new file mode 100644 index 0000000..aaeaf5d --- /dev/null +++ b/m/hut/msmtp.nix @@ -0,0 +1,24 @@ +{ config, lib, ... }: +{ + age.secrets.jungleRobotPassword = { + file = ../../secrets/jungle-robot-password.age; + group = "gitea"; + mode = "440"; + }; + + programs.msmtp = { + enable = true; + accounts = { + default = { + auth = true; + tls = true; + tls_starttls = false; + port = 465; + host = "mail.bsc.es"; + user = "jungle-robot"; + passwordeval = "cat ${config.age.secrets.jungleRobotPassword.path}"; + from = "jungle-robot@bsc.es"; + }; + }; + }; +} diff --git a/m/hut/nfs.nix b/m/hut/nfs.nix new file mode 100644 index 0000000..affb304 --- /dev/null +++ b/m/hut/nfs.nix @@ -0,0 +1,9 @@ +{ ... }: + +{ + services.nfs.server.enable = true; + services.nfs.server.exports = '' + /nix 10.0.40.0/24(ro,sync,no_subtree_check,root_squash) + ''; + networking.firewall.allowedTCPPorts = [ 2049 ]; +} diff --git a/m/hut/nginx.nix b/m/hut/nginx.nix new file mode 100644 index 0000000..f38d587 --- /dev/null +++ b/m/hut/nginx.nix @@ -0,0 +1,76 @@ +{ theFlake, pkgs, ... }: +let + website = pkgs.stdenv.mkDerivation { + name = "jungle-web"; + src = pkgs.fetchgit { + url = "https://jungle.bsc.es/git/rarias/jungle-website.git"; + rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1"; + hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4="; + }; + buildInputs = [ pkgs.hugo ]; + buildPhase = '' + rm -rf public/ + hugo + ''; + installPhase = '' + cp -r public $out + ''; + # Don't mess doc/ + dontFixup = true; + }; +in +{ + networking.firewall.allowedTCPPorts = [ 80 ]; + services.nginx = { + enable = true; + virtualHosts."jungle.bsc.es" = { + root = "${website}"; + listen = [ + { + addr = "0.0.0.0"; + port = 80; + } + ]; + extraConfig = '' + set_real_ip_from 127.0.0.1; + set_real_ip_from 84.88.52.107; + real_ip_recursive on; + real_ip_header X-Forwarded-For; + + location /git { + rewrite ^/git$ / break; + rewrite ^/git/(.*) /$1 break; + proxy_pass http://127.0.0.1:3000; + proxy_redirect http:// $scheme://; + } + location /cache { + rewrite ^/cache/(.*) /$1 break; + proxy_pass http://127.0.0.1:5000; + proxy_redirect http:// $scheme://; + } + location /lists { + proxy_pass http://127.0.0.1:8081; + proxy_redirect http:// $scheme://; + } + location /grafana { + proxy_pass http://127.0.0.1:2342; + proxy_redirect http:// $scheme://; + proxy_set_header Host $host; + # Websockets + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + location ~ ^/~(.+?)(/.*)?$ { + alias /ceph/home/$1/public_html$2; + index index.html index.htm; + autoindex on; + absolute_redirect off; + } + location /p/ { + alias /ceph/p/; + } + ''; + }; + }; +} diff --git a/m/hut/nix-serve.nix b/m/hut/nix-serve.nix new file mode 100644 index 0000000..35ccd72 --- /dev/null +++ b/m/hut/nix-serve.nix @@ -0,0 +1,16 @@ +{ config, ... }: + +{ + age.secrets.nixServe.file = ../../secrets/nix-serve.age; + + services.nix-serve = { + enable = true; + # Only listen locally, as we serve it via ssh + bindAddress = "127.0.0.1"; + port = 5000; + + secretKeyFile = config.age.secrets.nixServe.path; + # Public key: + # jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0= + }; +} diff --git a/m/hut/p.nix b/m/hut/p.nix new file mode 100644 index 0000000..30bfc0b --- /dev/null +++ b/m/hut/p.nix @@ -0,0 +1,43 @@ +{ pkgs, lib, config, ... }: +let + p = pkgs.writeShellScriptBin "p" '' + set -e + cd /ceph + pastedir="p/$USER" + mkdir -p "$pastedir" + + ext="txt" + + if [ -n "$1" ]; then + ext="$1" + fi + + out=$(mktemp "$pastedir/XXXXXXXX.$ext") + + cat > "$out" + chmod go+r "$out" + echo "https://jungle.bsc.es/$out" + ''; +in +{ + environment.systemPackages = with pkgs; [ p ]; + + # Make sure we have a directory per user. We cannot use the nice + # systemd-tmpfiles-setup.service service because this is a remote FS, and it + # may not be mounted when it runs. + systemd.services.create-paste-dirs = let + # Take only normal users in hut + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + commands = lib.concatLists (lib.mapAttrsToList + (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}" + ]) users); + script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands); + in { + enable = true; + wants = [ "remote-fs.target" ]; + after = [ "remote-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = script; + }; +} diff --git a/m/hut/postgresql.nix b/m/hut/postgresql.nix new file mode 100644 index 0000000..fc86d7a --- /dev/null +++ b/m/hut/postgresql.nix @@ -0,0 +1,19 @@ +{ lib, ... }: + +{ + services.postgresql = { + enable = true; + ensureDatabases = [ "perftestsdb" ]; + ensureUsers = [ + { name = "anavarro"; ensureClauses.superuser = true; } + { name = "rarias"; ensureClauses.superuser = true; } + { name = "grafana"; } + ]; + authentication = '' + #type database DBuser auth-method + local perftestsdb rarias trust + local perftestsdb anavarro trust + local perftestsdb grafana trust + ''; + }; +} diff --git a/m/hut/public-inbox.css b/m/hut/public-inbox.css new file mode 100644 index 0000000..9d0367c --- /dev/null +++ b/m/hut/public-inbox.css @@ -0,0 +1,79 @@ +/* + * CC0-1.0 + * Dark color scheme using 216 web-safe colors, inspired + * somewhat by the default color scheme in mutt. + * It reduces eyestrain for me, and energy usage for all: + * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme + */ + +* { + font-size: 14px; + font-family: monospace; +} + +pre { + white-space: pre-wrap; + padding: 10px; + background: #f5f5f5; +} + +hr { + margin: 30px 0; +} + +body { + max-width: 120ex; /* 120 columns wide */ + margin: 50px auto; +} + +/* + * Underlined links add visual noise which make them hard-to-read. + * Use colors to make them stand out, instead. + */ +a:link { + color: #007; + text-decoration: none; +} +a:visited { + color:#504; +} +a:hover { + text-decoration: underline; +} + +/* quoted text in emails gets a different color */ +*.q { color:gray } + +/* + * these may be used with cgit , too. + * (cgit uses
, public-inbox uses ) + */ +*.add { color:darkgreen } /* diff post-image lines */ +*.del { color:darkred } /* diff pre-image lines */ +*.head { color:black } /* diff header (metainformation) */ +*.hunk { color:gray } /* diff hunk-header */ + +/* + * highlight 3.x colors (tested 3.18) for displaying blobs. + * This doesn't use most of the colors available, as I find too + * many colors overwhelming, so the default is commented out. + */ +.hl.num { color:#f30 } /* number */ +.hl.esc { color:#f0f } /* escape character */ +.hl.str { color:#f30 } /* string */ +.hl.ppc { color:#f0f } /* preprocessor */ +.hl.pps { color:#f30 } /* preprocessor string */ +.hl.slc { color:#09f } /* single-line comment */ +.hl.com { color:#09f } /* multi-line comment */ +/* .hl.opt { color:#ccc } */ /* operator */ +/* .hl.ipl { color:#ccc } */ /* interpolation */ + +/* keyword groups kw[a-z] */ +.hl.kwa { color:#ff0 } +.hl.kwb { color:#0f0 } +.hl.kwc { color:#ff0 } +/* .hl.kwd { color:#ccc } */ + +/* line-number (unused by public-inbox) */ +/* .hl.lin { color:#ccc } */ + diff --git a/m/hut/public-inbox.nix b/m/hut/public-inbox.nix new file mode 100644 index 0000000..a3d7a67 --- /dev/null +++ b/m/hut/public-inbox.nix @@ -0,0 +1,47 @@ +{ lib, ... }: + +{ + services.public-inbox = { + enable = true; + http = { + enable = true; + port = 8081; + mounts = [ "/lists" ]; + }; + settings.publicinbox = { + css = [ "${./public-inbox.css}" ]; + wwwlisting = "all"; + }; + inboxes = { + bscpkgs = { + url = "https://jungle.bsc.es/lists/bscpkgs"; + address = [ "~rodarima/bscpkgs@lists.sr.ht" ]; + watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ]; + description = "Patches for bscpkgs"; + listid = "~rodarima/bscpkgs.lists.sr.ht"; + }; + jungle = { + url = "https://jungle.bsc.es/lists/jungle"; + address = [ "~rodarima/jungle@lists.sr.ht" ]; + watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ]; + description = "Patches for jungle"; + listid = "~rodarima/jungle.lists.sr.ht"; + }; + }; + }; + + # We need access to the network for the watch service, as we will fetch the + # emails directly from the IMAP server. + systemd.services.public-inbox-watch.serviceConfig = { + PrivateNetwork = lib.mkForce false; + RestrictAddressFamilies = lib.mkForce [ "AF_UNIX" "AF_INET" "AF_INET6" ]; + KillSignal = "SIGKILL"; # Avoid slow shutdown + + # Required for chmod(..., 02750) on directories by git, from + # systemd.exec(8): + # > Note that this restricts marking of any type of file system object with + # > these bits, including both regular files and directories (where the SGID + # > is a different meaning than for files, see documentation). + RestrictSUIDSGID = lib.mkForce false; + }; +} diff --git a/m/hut/pxe.nix b/m/hut/pxe.nix new file mode 100644 index 0000000..e3a74e2 --- /dev/null +++ b/m/hut/pxe.nix @@ -0,0 +1,35 @@ +{ theFlake, pkgs, ... }: + +# This module describes a script that can launch the pixiecore daemon to serve a +# NixOS image via PXE to a node to directly boot from there, without requiring a +# working disk. + +let + # The host config must have the netboot-minimal.nix module too + host = theFlake.nixosConfigurations.lake2; + sys = host.config.system; + build = sys.build; + kernel = "${build.kernel}/bzImage"; + initrd = "${build.netbootRamdisk}/initrd"; + init = "${build.toplevel}/init"; + + script = pkgs.writeShellScriptBin "pixiecore-helper" '' + #!/usr/bin/env bash -x + + ${pkgs.pixiecore}/bin/pixiecore \ + boot ${kernel} ${initrd} --cmdline "init=${init} loglevel=4" \ + --debug --dhcp-no-bind --port 64172 --status-port 64172 "$@" + ''; +in +{ + ## We need a DHCP server to provide the IP + #services.dnsmasq = { + # enable = true; + # settings = { + # domain-needed = true; + # dhcp-range = [ "192.168.0.2,192.168.0.254" ]; + # }; + #}; + + environment.systemPackages = [ script ]; +} diff --git a/m/hut/targets.yml b/m/hut/targets.yml new file mode 100644 index 0000000..fc4c72d --- /dev/null +++ b/m/hut/targets.yml @@ -0,0 +1,15 @@ +- targets: + - owl1-ipmi + - owl2-ipmi + - xeon03-ipmi + - xeon04-ipmi + - koro-ipmi + - weasel-ipmi + - hut-ipmi + - eudy-ipmi + # Storage + - bay-ipmi + - oss01-ipmi + - lake2-ipmi + labels: + job: ipmi-lan diff --git a/m/koro/configuration.nix b/m/koro/configuration.nix new file mode 100644 index 0000000..a106b62 --- /dev/null +++ b/m/koro/configuration.nix @@ -0,0 +1,35 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/ssf.nix + #(modulesPath + "/installer/netboot/netboot-minimal.nix") + + ../eudy/cpufreq.nix + ../eudy/users.nix + ./kernel.nix + ]; + + # Select this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5376d2"; + + # disable automatic garbage collector + nix.gc.automatic = lib.mkForce false; + + # members of the tracing group can use the lttng-provided kernel events + # without root permissions + users.groups.tracing.members = [ "arocanon" "vlopez" ]; + + # set up both ethernet and infiniband ips + networking = { + hostName = "koro"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.5"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.5"; + prefixLength = 24; + } ]; + }; +} diff --git a/m/koro/kernel.nix b/m/koro/kernel.nix new file mode 100644 index 0000000..7ddf4a8 --- /dev/null +++ b/m/koro/kernel.nix @@ -0,0 +1,70 @@ +{ pkgs, lib, ... }: + +let + #fcs-devel = pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = /mnt/data/kernel/fcs/kernel/src; + # configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig; + #}; + + #fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false; + #fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false; + #fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true; + #fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true; + #fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom { + # version = "6.2.8"; + # src = builtins.fetchGit { + # url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + # rev = gitCommit; + # ref = "fcs"; + # }; + # configfile = if lockdep then ./configs/lockdep else ./configs/defconfig; + #}; + + kernel = nixos-fcs; + + nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { + version = "6.2.8"; + src = builtins.fetchGit { + url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; + rev = gitCommit; + ref = branch; + }; + structuredExtraConfig = with lib.kernel; { + # add general custom kernel options here + } // lib.optionalAttrs lockStat { + LOCK_STAT = yes; + } // lib.optionalAttrs preempt { + PREEMPT = lib.mkForce yes; + PREEMPT_VOLUNTARY = lib.mkForce no; + }; + kernelPatches = []; + extraMeta.branch = lib.versions.majorMinor version; + })); + + nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";}; + nixos-fcs-lockstat = nixos-fcs.override { + lockStat = true; + }; + nixos-fcs-lockstat-preempt = nixos-fcs.override { + lockStat = true; + preempt = true; + }; + latest = pkgs.linuxPackages_latest; + +in { + imports = [ + ../eudy/kernel/lttng.nix + ../eudy/kernel/perf.nix + ]; + boot.kernelPackages = lib.mkForce kernel; + + # disable all cpu mitigations + boot.kernelParams = [ + "mitigations=off" + ]; + + # enable memory overcommit, needed to build a taglibc system using nix after + # increasing the openblas memory footprint + boot.kernel.sysctl."vm.overcommit_memory" = 1; +} diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix new file mode 100644 index 0000000..a67e5ae --- /dev/null +++ b/m/lake2/configuration.nix @@ -0,0 +1,84 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/ssf.nix + ../module/monitoring.nix + ../module/hut-substituter.nix + ]; + + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; + + boot.kernel.sysctl = { + "kernel.yama.ptrace_scope" = lib.mkForce "1"; + }; + + environment.systemPackages = with pkgs; [ + ceph + ]; + + services.ceph = { + enable = true; + global = { + fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b"; + monHost = "10.0.40.40"; + monInitialMembers = "bay"; + clusterNetwork = "10.0.40.40/24"; # Use Ethernet only + }; + osd = { + enable = true; + # One daemon per NVME disk + daemons = [ "4" "5" "6" "7" ]; + extraConfig = { + "osd crush chooseleaf type" = "0"; + "osd journal size" = "10000"; + "osd pool default min size" = "2"; + "osd pool default pg num" = "200"; + "osd pool default pgp num" = "200"; + "osd pool default size" = "3"; + }; + }; + }; + + networking = { + hostName = "lake2"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.42"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.42"; + prefixLength = 24; + } ]; + firewall = { + extraCommands = '' + # Accept all incoming TCP traffic from bay + iptables -A nixos-fw -p tcp -s bay -j nixos-fw-accept + # Accept monitoring requests from hut + iptables -A nixos-fw -p tcp -s hut --dport 9002 -j nixos-fw-accept + # Accept all Ceph traffic from the local network + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept + ''; + }; + }; + + # Missing service for volumes, see: + # https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/ + systemd.services.ceph-volume = { + enable = true; + description = "Ceph Volume activation"; + unitConfig = { + Type = "oneshot"; + After = "local-fs.target"; + Wants = "local-fs.target"; + }; + path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ]; + serviceConfig = { + KillMode = "none"; + Environment = "CEPH_VOLUME_TIMEOUT=10000"; + ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'"; + TimeoutSec = "0"; + }; + wantedBy = [ "multi-user.target" ]; + }; +} diff --git a/m/map.nix b/m/map.nix new file mode 100644 index 0000000..7ebd0bb --- /dev/null +++ b/m/map.nix @@ -0,0 +1,70 @@ +{ + # In physical order from top to bottom (see note below) + ssf = { + # Switches for Ethernet and OmniPath + switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; }; + switch-opa = { pos=41; size=1; }; + + # SSF login + apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; }; + + # Storage + bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; }; + lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; }; + lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; }; + + # Compute xeon + owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; }; + owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; }; + xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; }; + # Slot 34 empty + koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; }; + weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; }; + hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; }; + eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; }; + + # 16 KNL nodes, 4 per chassis + knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; }; + knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; }; + knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; }; + knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; }; + + # Slot 19 empty + + # EPI (hw team, guessed order) + epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; }; + epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; }; + epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; }; + anon = { pos=14; size=2; }; # Unlabeled machine. Operative + + # These are old and decommissioned (off) + power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; }; + powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; }; + gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; }; + odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; }; + amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic + moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; }; + }; + + bsc2218 = { + raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; }; + tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; }; + }; + + upc = { + fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; }; + }; + + # NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the + # bottom. Example: + # + # | ... | - [pos+size] <--- Label in chassis + # +--------+ + # | node | - [pos+1] + # | 2U | - [pos] + # +------- + + # | ... | - [pos-1] + # + # NOTE: The board and sn refers to the FRU information (Board Product and + # Board Serial) via `ipmitool fru print 0`. +} diff --git a/m/module/amd-uprof.nix b/m/module/amd-uprof.nix new file mode 100644 index 0000000..7d20a6f --- /dev/null +++ b/m/module/amd-uprof.nix @@ -0,0 +1,49 @@ +{ config, lib, pkgs, ... }: + +{ + options = { + services.amd-uprof = { + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = "Whether to enable AMD uProf."; + }; + }; + }; + + # Only setup amd-uprof if enabled + config = lib.mkIf config.services.amd-uprof.enable { + + # First make sure that we add the module to the list of available modules + # in the kernel matching the same kernel version of this configuration. + boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ]; + boot.kernelModules = [ "AMDPowerProfiler" ]; + + # Make the userspace tools available in $PATH. + environment.systemPackages = with pkgs; [ amd-uprof ]; + + # The AMDPowerProfiler module doesn't create the /dev device nor it emits + # any uevents, so we cannot use udev rules to automatically create the + # device. Instead, we run a systemd unit that does it after loading the + # modules. + systemd.services.amd-uprof-device = { + description = "Create /dev/AMDPowerProfiler device"; + after = [ "systemd-modules-load.service" ]; + wantedBy = [ "multi-user.target" ]; + unitConfig.ConditionPathExists = [ + "/proc/AMDPowerProfiler/device" + "!/dev/AMDPowerProfiler" + ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" '' + mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0 + ''; + ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" '' + rm -f /dev/AMDPowerProfiler + ''; + }; + }; + }; +} diff --git a/m/module/ceph.nix b/m/module/ceph.nix new file mode 100644 index 0000000..cf1217f --- /dev/null +++ b/m/module/ceph.nix @@ -0,0 +1,24 @@ +{ config, pkgs, ... }: + +# Mounts the /ceph filesystem at boot +{ + environment.systemPackages = with pkgs; [ + ceph-client + fio # For benchmarks + ]; + + # We need the ceph module loaded as the mount.ceph binary fails to run the + # modprobe command. + boot.kernelModules = [ "ceph" ]; + + age.secrets.cephUser.file = ../../secrets/ceph-user.age; + + fileSystems."/ceph" = { + fsType = "ceph"; + device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/"; + options = [ + "mon_addr=10.0.40.40" + "secretfile=${config.age.secrets.cephUser.path}" + ]; + }; +} diff --git a/m/module/debuginfod.nix b/m/module/debuginfod.nix new file mode 100644 index 0000000..a7dc05d --- /dev/null +++ b/m/module/debuginfod.nix @@ -0,0 +1,3 @@ +{ + services.nixseparatedebuginfod.enable = true; +} diff --git a/m/module/emulation.nix b/m/module/emulation.nix new file mode 100644 index 0000000..ae63970 --- /dev/null +++ b/m/module/emulation.nix @@ -0,0 +1,3 @@ +{ + boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; +} diff --git a/m/module/hut-substituter.nix b/m/module/hut-substituter.nix new file mode 100644 index 0000000..92fda0b --- /dev/null +++ b/m/module/hut-substituter.nix @@ -0,0 +1,13 @@ +{ config, ... }: +{ + nix.settings = + # Don't add hut as a cache to itself + assert config.networking.hostName != "hut"; + { + extra-substituters = [ "http://hut/cache" ]; + extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; + + # Set a low timeout in case hut is down + connect-timeout = 3; # seconds + }; +} diff --git a/m/module/jungle-users.nix b/m/module/jungle-users.nix new file mode 100644 index 0000000..9601d29 --- /dev/null +++ b/m/module/jungle-users.nix @@ -0,0 +1,24 @@ +{ config, lib, ... }: + +with lib; + +{ + options = { + users.jungleUsers = mkOption { + type = types.attrsOf (types.anything // { check = (x: x ? "hosts"); }); + description = '' + Same as users.users but with the extra `hosts` attribute, which controls + access to the nodes by `networking.hostName`. + ''; + }; + }; + + config = let + allowedUser = host: userConf: builtins.elem host userConf.hosts; + filterUsers = host: users: filterAttrs (n: v: allowedUser host v) users; + removeHosts = users: mapAttrs (n: v: builtins.removeAttrs v [ "hosts" ]) users; + currentHost = config.networking.hostName; + in { + users.users = removeHosts (filterUsers currentHost config.users.jungleUsers); + }; +} diff --git a/m/module/meteocat-exporter.nix b/m/module/meteocat-exporter.nix new file mode 100644 index 0000000..ffc0338 --- /dev/null +++ b/m/module/meteocat-exporter.nix @@ -0,0 +1,17 @@ +{ config, lib, pkgs, ... }: + +with lib; + +{ + systemd.services."prometheus-meteocat-exporter" = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Restart = mkDefault "always"; + PrivateTmp = mkDefault true; + WorkingDirectory = mkDefault "/tmp"; + DynamicUser = mkDefault true; + ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter"; + }; + }; +} diff --git a/m/module/monitoring.nix b/m/module/monitoring.nix new file mode 100644 index 0000000..0ef9209 --- /dev/null +++ b/m/module/monitoring.nix @@ -0,0 +1,25 @@ +{ config, lib, ... }: + +{ + # We need access to the devices to monitor the disk space + systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only"; + + # Required to allow the smartctl exporter to read the nvme0 character device, + # see the commit message on: + # https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80 + services.udev.extraRules = '' + SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk" + ''; + + services.prometheus = { + exporters = { + node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + }; + smartctl.enable = true; + }; + }; +} diff --git a/m/module/nix-daemon-builds.sh b/m/module/nix-daemon-builds.sh new file mode 100755 index 0000000..79ab65c --- /dev/null +++ b/m/module/nix-daemon-builds.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +# Locate nix daemon pid +nd=$(pgrep -o nix-daemon) + +# Locate children of nix-daemon +pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children") + +# For each children, locate 2nd level children +pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n') + +cat </dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.') + user=$(ps -o uname= -p "$pid") + if [ -n "$name" -a -n "$user" ]; then + printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name" + fi +done diff --git a/m/module/nix-daemon-exporter.nix b/m/module/nix-daemon-exporter.nix new file mode 100644 index 0000000..9353fe7 --- /dev/null +++ b/m/module/nix-daemon-exporter.nix @@ -0,0 +1,23 @@ +{ pkgs, config, lib, ... }: +let + script = pkgs.runCommand "nix-daemon-exporter.sh" { } + '' + cp ${./nix-daemon-builds.sh} $out; + chmod +x $out + '' + ; +in +{ + systemd.services.nix-daemon-exporter = { + description = "Daemon to export nix-daemon metrics"; + path = [ pkgs.procps pkgs.ripgrep ]; + wantedBy = [ "default.target" ]; + serviceConfig = { + Type = "simple"; + ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}"; + # Needed root to read the environment, potentially unsafe + User = "root"; + Group = "root"; + }; + }; +} diff --git a/m/module/nvidia.nix b/m/module/nvidia.nix new file mode 100644 index 0000000..baebc42 --- /dev/null +++ b/m/module/nvidia.nix @@ -0,0 +1,20 @@ +{ lib, config, pkgs, ... }: +{ + # Configure Nvidia driver to use with CUDA + hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; + hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set"); + hardware.graphics.enable = true; + nixpkgs.config.nvidia.acceptLicense = true; + services.xserver.videoDrivers = [ "nvidia" ]; + + # enable support for derivations which require nvidia-gpu to be available + # > requiredSystemFeatures = [ "cuda" ]; + programs.nix-required-mounts.enable = true; + programs.nix-required-mounts.presets.nvidia-gpu.enable = true; + # They forgot to add the symlink + programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [ + config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument + ]; + + environment.systemPackages = [ pkgs.cudainfo ]; +} diff --git a/m/module/p.nix b/m/module/p.nix new file mode 100644 index 0000000..2005eb8 --- /dev/null +++ b/m/module/p.nix @@ -0,0 +1,68 @@ +{ config, lib, pkgs, ... }: + +let + cfg = config.services.p; +in +{ + options = { + services.p = { + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = "Whether to enable the p service."; + }; + path = lib.mkOption { + type = lib.types.str; + default = "/var/lib/p"; + description = "Where to save the pasted files on disk."; + }; + url = lib.mkOption { + type = lib.types.str; + default = "https://jungle.bsc.es/p"; + description = "URL prefix for the printed file."; + }; + }; + }; + + config = lib.mkIf cfg.enable { + environment.systemPackages = let + p = pkgs.writeShellScriptBin "p" '' + set -e + pastedir="${cfg.path}/$USER" + cd "$pastedir" + + ext="txt" + if [ -n "$1" ]; then + ext="$1" + fi + + out=$(mktemp "XXXXXXXX.$ext") + cat > "$out" + chmod go+r "$out" + echo "${cfg.url}/$USER/$out" + ''; + in [ p ]; + + systemd.services.p = let + # Take only normal users + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + # Create a directory for each user + commands = lib.concatLists (lib.mapAttrsToList (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0755 ${cfg.path}/${user.name}" + ]) users); + in { + description = "P service setup"; + requires = [ "network-online.target" ]; + #wants = [ "remote-fs.target" ]; + #after = [ "remote-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + ExecStart = pkgs.writeShellScript "p-init.sh" ('' + + install -d -o root -g root -m 0755 ${cfg.path} + + '' + (lib.concatLines commands)); + }; + }; + }; +} diff --git a/m/module/power-policy.nix b/m/module/power-policy.nix new file mode 100644 index 0000000..61dcd01 --- /dev/null +++ b/m/module/power-policy.nix @@ -0,0 +1,33 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.power.policy; +in +{ + options = { + power.policy = mkOption { + type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]); + default = null; + description = "Set power policy to use via IPMI."; + }; + }; + + config = mkIf (cfg != null) { + systemd.services."power-policy" = { + description = "Set power policy to use via IPMI"; + wantedBy = [ "multi-user.target" ]; + unitConfig = { + StartLimitBurst = "10"; + StartLimitIntervalSec = "10m"; + }; + serviceConfig = { + ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}"; + Type = "oneshot"; + Restart = "on-failure"; + RestartSec = "5s"; + }; + }; + }; +} diff --git a/m/module/slurm-client.nix b/m/module/slurm-client.nix new file mode 100644 index 0000000..deec844 --- /dev/null +++ b/m/module/slurm-client.nix @@ -0,0 +1,24 @@ +{ lib, ... }: + +{ + imports = [ + ./slurm-common.nix + ]; + + systemd.services.slurmd.serviceConfig = { + # Kill all processes in the control group on stop/restart. This will kill + # all the jobs running, so ensure that we only upgrade when the nodes are + # not in use. See: + # https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb + # https://bugs.schedmd.com/show_bug.cgi?id=2095#c24 + KillMode = lib.mkForce "control-group"; + + # If slurmd fails to contact the control server it will fail, causing the + # node to remain out of service until manually restarted. Always try to + # restart it. + Restart = "always"; + RestartSec = "30s"; + }; + + services.slurm.client.enable = true; +} diff --git a/m/module/slurm-common.nix b/m/module/slurm-common.nix new file mode 100644 index 0000000..180e2a5 --- /dev/null +++ b/m/module/slurm-common.nix @@ -0,0 +1,115 @@ +{ config, pkgs, ... }: + +let + suspendProgram = pkgs.writeShellScript "suspend.sh" '' + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Shutting down host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off + done + ''; + + resumeProgram = pkgs.writeShellScript "resume.sh" '' + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Starting host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on + done + ''; + +in { + services.slurm = { + controlMachine = "apex"; + clusterName = "jungle"; + nodeName = [ + "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" + "fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1" + ]; + + partitionName = [ + "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + "fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" + ]; + + # See slurm.conf(5) for more details about these options. + extraConfig = '' + # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but + # not with Intel MPI. For that use the compatibility shim libpmi.so + # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx + # library in SLURM (--mpi=pmix). See more details here: + # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 + MpiDefault=pmix + + # When a node reboots return that node to the slurm queue as soon as it + # becomes operative again. + ReturnToService=2 + + # Track all processes by using a cgroup + ProctrackType=proctrack/cgroup + + # Enable task/affinity to allow the jobs to run in a specified subset of + # the resources. Use the task/cgroup plugin to enable process containment. + TaskPlugin=task/affinity,task/cgroup + + # Power off unused nodes until they are requested + SuspendProgram=${suspendProgram} + SuspendTimeout=60 + ResumeProgram=${resumeProgram} + ResumeTimeout=300 + SuspendExcNodes=fox + + # Turn the nodes off after 1 hour of inactivity + SuspendTime=3600 + + # Reduce port range so we can allow only this range in the firewall + SrunPortRange=60000-61000 + + # Use cores as consumable resources. In SLURM terms, a core may have + # multiple hardware threads (or CPUs). + SelectType=select/cons_tres + + # Ignore memory constraints and only use unused cores to share a node with + # other jobs. + SelectTypeParameters=CR_Core + + # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html + # This sets up the "extern" step into which ssh-launched processes will be + # adopted. Alloc runs the prolog at job allocation (salloc) rather than + # when a task runs (srun) so we can ssh early. + PrologFlags=Alloc,Contain,X11 + + # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes + # adopted by the external step, similar to tasks running in regular steps + # LaunchParameters=ulimit_pam_adopt + SlurmdDebug=debug5 + #DebugFlags=Protocol,Cgroup + ''; + + extraCgroupConfig = '' + CgroupPlugin=cgroup/v2 + #ConstrainCores=yes + ''; + }; + + # Place the slurm config in /etc as this will be required by PAM + environment.etc.slurm.source = config.services.slurm.etcSlurm; + + age.secrets.mungeKey = { + file = ../../secrets/munge-key.age; + owner = "munge"; + group = "munge"; + }; + + services.munge = { + enable = true; + password = config.age.secrets.mungeKey.path; + }; +} diff --git a/m/module/slurm-exporter.nix b/m/module/slurm-exporter.nix new file mode 100644 index 0000000..ad31f45 --- /dev/null +++ b/m/module/slurm-exporter.nix @@ -0,0 +1,28 @@ +{ config, lib, pkgs, ... }: + +# See also: https://github.com/NixOS/nixpkgs/pull/112010 +# And: https://github.com/NixOS/nixpkgs/pull/115839 + +with lib; + +{ + systemd.services."prometheus-slurm-exporter" = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Restart = mkDefault "always"; + PrivateTmp = mkDefault true; + WorkingDirectory = mkDefault "/tmp"; + DynamicUser = mkDefault true; + ExecStart = '' + ${pkgs.prometheus-slurm-exporter}/bin/prometheus-slurm-exporter --listen-address "127.0.0.1:9341" + ''; + Environment = [ + "PATH=${pkgs.slurm}/bin" + # We need to specify the slurm config to be able to talk to the slurmd + # daemon. + "SLURM_CONF=${config.services.slurm.etcSlurm}/slurm.conf" + ]; + }; + }; +} diff --git a/m/module/slurm-firewall.nix b/m/module/slurm-firewall.nix new file mode 100644 index 0000000..8f52022 --- /dev/null +++ b/m/module/slurm-firewall.nix @@ -0,0 +1,8 @@ +{ ... }: + +{ + networking.firewall = { + # Required for PMIx in SLURM, we should find a better way + allowedTCPPortRanges = [ { from=1024; to=65535; } ]; + }; +} diff --git a/m/module/slurm-hut-nix-store.nix b/m/module/slurm-hut-nix-store.nix new file mode 100644 index 0000000..2ec8e2f --- /dev/null +++ b/m/module/slurm-hut-nix-store.nix @@ -0,0 +1,19 @@ +{ ... }: + +{ + # Mount the hut nix store via NFS + fileSystems."/mnt/hut-nix-store" = { + device = "hut:/nix/store"; + fsType = "nfs"; + options = [ "ro" ]; + }; + + systemd.services.slurmd.serviceConfig = { + # When running a job, bind the hut store in /nix/store so the paths are + # available too. + # FIXME: This doesn't keep the programs in /run/current-system/sw/bin + # available in the store. Ideally they should be merged but the overlay FS + # doesn't work when the underlying directories change. + BindReadOnlyPaths = "/mnt/hut-nix-store:/nix/store"; + }; +} diff --git a/m/module/slurm-server.nix b/m/module/slurm-server.nix new file mode 100644 index 0000000..25fe4f6 --- /dev/null +++ b/m/module/slurm-server.nix @@ -0,0 +1,23 @@ +{ ... }: + +{ + imports = [ + ./slurm-common.nix + ]; + + services.slurm.server.enable = true; + + networking.firewall = { + extraCommands = '' + # Accept slurm connections to controller from compute nodes + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept + # Accept slurm connections from compute nodes for srun + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept + + # Accept slurm connections to controller from fox (via wireguard) + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept + # Accept slurm connections from fox for srun (via wireguard) + iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept + ''; + }; +} diff --git a/m/module/upc-qaire-exporter.nix b/m/module/upc-qaire-exporter.nix new file mode 100644 index 0000000..ddb27eb --- /dev/null +++ b/m/module/upc-qaire-exporter.nix @@ -0,0 +1,17 @@ +{ config, lib, pkgs, ... }: + +with lib; + +{ + systemd.services."prometheus-upc-qaire-exporter" = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Restart = mkDefault "always"; + PrivateTmp = mkDefault true; + WorkingDirectory = mkDefault "/tmp"; + DynamicUser = mkDefault true; + ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter"; + }; + }; +} diff --git a/m/module/vpn-dac.nix b/m/module/vpn-dac.nix new file mode 100644 index 0000000..e677c73 --- /dev/null +++ b/m/module/vpn-dac.nix @@ -0,0 +1,35 @@ +{config, ...}: +{ + age.secrets.vpn-dac-login.file = ../../secrets/vpn-dac-login.age; + age.secrets.vpn-dac-client-key.file = ../../secrets/vpn-dac-client-key.age; + + services.openvpn.servers = { + # systemctl status openvpn-dac.service + dac = { + config = '' + client + dev tun + proto tcp + remote vpn.ac.upc.edu 1194 + remote vpn.ac.upc.edu 80 + resolv-retry infinite + nobind + persist-key + persist-tun + ca ${./vpn-dac/ca.crt} + cert ${./vpn-dac/client.crt} + # Only key needs to be secret + key ${config.age.secrets.vpn-dac-client-key.path} + remote-cert-tls server + comp-lzo + verb 3 + auth-user-pass ${config.age.secrets.vpn-dac-login.path} + reneg-sec 0 + + # Only route fox-ipmi + pull-filter ignore "route " + route 147.83.35.27 255.255.255.255 + ''; + }; + }; +} diff --git a/m/module/vpn-dac/ca.crt b/m/module/vpn-dac/ca.crt new file mode 100644 index 0000000..af1427e --- /dev/null +++ b/m/module/vpn-dac/ca.crt @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFUjCCBDqgAwIBAgIJAJH118PApk5hMA0GCSqGSIb3DQEBCwUAMIHLMQswCQYD +VQQGEwJFUzESMBAGA1UECBMJQmFyY2Vsb25hMRIwEAYDVQQHEwlCYXJjZWxvbmEx +LTArBgNVBAoTJFVuaXZlcnNpdGF0IFBvbGl0ZWNuaWNhIGRlIENhdGFsdW55YTEk +MCIGA1UECxMbQXJxdWl0ZWN0dXJhIGRlIENvbXB1dGFkb3JzMRAwDgYDVQQDEwdM +Q0FDIENBMQ0wCwYDVQQpEwRMQ0FDMR4wHAYJKoZIhvcNAQkBFg9sY2FjQGFjLnVw +Yy5lZHUwHhcNMTYwMTEyMTI0NDIxWhcNNDYwMTEyMTI0NDIxWjCByzELMAkGA1UE +BhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0w +KwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAi +BgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENB +QyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMu +ZWR1MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0CteSeof7Xwi51kC +F0nQ4E9iR5Lq7wtfRuVPn6JJcIxJJ6+F9gr4R/HIHTztW4XAzReE36DYfexupx3D +6UgQIkMLlVyGqRbulNF+RnCx20GosF7Dm4RGBVvOxBP1PGjYq/A+XhaaDAFd0cOF +LMNkzuYP7PF0bnBEaHnxmN8bPmuyDyas7fK9AAc3scyWT2jSBPbOVFvCJwPg8MH9 +V/h+hKwL/7hRt1MVfVv2qyIuKwTki8mUt0RcVbP7oJoRY5K1+R52phIz/GL/b4Fx +L6MKXlQxLi8vzP4QZXgCMyV7oFNdU3VqCEXBA11YIRvsOZ4QS19otIk/ZWU5x+HH +LAIJ7wIDAQABo4IBNTCCATEwHQYDVR0OBBYEFNyezX1cH1N4QR14ebBpljqmtE7q +MIIBAAYDVR0jBIH4MIH1gBTcns19XB9TeEEdeHmwaZY6prRO6qGB0aSBzjCByzEL +MAkGA1UEBhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vs +b25hMS0wKwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVu +eWExJDAiBgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UE +AxMHTENBQyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0Bh +Yy51cGMuZWR1ggkAkfXXw8CmTmEwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsF +AAOCAQEAUAmOvVXIQrR+aZVO0bOTeugKBHB75eTIZSIHIn2oDUvDbAP5GXIJ56A1 +6mZXxemSMY8/9k+pRcwJhfat3IgvAN159XSqf9kRv0NHgc3FWUI1Qv/BsAn0vJO/ +oK0dbmbbRWqt86qNrCN+cUfz5aovvxN73jFfnvfDQFBk/8enj9wXxYfokjjLPR1Q ++oTkH8dY68qf71oaUB9MndppPEPSz0K1S6h1XxvJoSu9MVSXOQHiq1cdZdxRazI3 +4f7q9sTCL+khwDAuZxAYzlEYxFFa/NN8PWU6xPw6V+t/aDhOiXUPJQB/O/K7mw3Z +TQQx5NqM7B5jjak5fauR3/oRD8XXsA== +-----END CERTIFICATE----- diff --git a/m/module/vpn-dac/client.crt b/m/module/vpn-dac/client.crt new file mode 100644 index 0000000..aec0d98 --- /dev/null +++ b/m/module/vpn-dac/client.crt @@ -0,0 +1,100 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: 2 (0x2) + Signature Algorithm: sha256WithRSAEncryption + Issuer: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu + Validity + Not Before: Jan 12 12:45:41 2016 GMT + Not After : Jan 12 12:45:41 2046 GMT + Subject: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=client/name=LCAC/emailAddress=lcac@ac.upc.edu + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + Public-Key: (2048 bit) + Modulus: + 00:97:99:fa:7a:0e:4d:e2:1d:a5:b1:a8:14:18:64: + c7:66:bf:de:99:1d:92:3b:86:82:4d:95:39:f7:a6: + 56:49:97:14:4f:e3:37:00:6c:f4:d0:1d:56:79:e7: + 19:b5:dd:36:15:8e:1d:57:7b:59:29:d2:11:bf:58: + 48:e0:f7:41:3d:16:64:8d:a2:0b:4a:ac:fa:c6:83: + dc:10:2a:2c:d9:97:48:ee:11:2a:bc:4b:60:dd:b9: + 2e:8f:45:ca:87:0b:38:65:1c:f8:a2:1d:f9:50:aa: + 6e:60:f9:48:df:57:12:23:e1:e7:0c:81:5c:9f:c5: + b2:e6:99:99:95:30:6d:57:36:06:8c:fd:fb:f9:4f: + 60:d2:3c:ba:ae:28:56:2f:da:58:5c:e8:c5:7b:ec: + 76:d9:28:6e:fb:8c:07:f9:d7:23:c3:72:76:3c:fa: + dc:20:67:8f:cc:16:e0:91:07:d5:68:f9:20:4d:7d: + 5c:2d:02:04:16:76:52:f3:53:be:a3:dc:0d:d5:fb: + 6b:55:29:f3:52:35:c8:7d:99:d1:4a:94:be:b1:8e: + fd:85:18:25:eb:41:e9:56:da:af:62:84:20:0a:00: + 17:94:92:94:91:6a:f8:54:37:17:ee:1e:bb:fb:93: + 71:91:d9:e4:e9:b8:3b:18:7d:6d:7d:4c:ce:58:55: + f9:41 + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Basic Constraints: + CA:FALSE + Netscape Comment: + Easy-RSA Generated Certificate + X509v3 Subject Key Identifier: + 1B:88:06:D5:33:1D:5C:48:46:B5:DE:78:89:36:96:91:3A:74:43:18 + X509v3 Authority Key Identifier: + keyid:DC:9E:CD:7D:5C:1F:53:78:41:1D:78:79:B0:69:96:3A:A6:B4:4E:EA + DirName:/C=ES/ST=Barcelona/L=Barcelona/O=Universitat Politecnica de Catalunya/OU=Arquitectura de Computadors/CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu + serial:91:F5:D7:C3:C0:A6:4E:61 + + X509v3 Extended Key Usage: + TLS Web Client Authentication + X509v3 Key Usage: + Digital Signature + X509v3 Subject Alternative Name: + DNS:client + Signature Algorithm: sha256WithRSAEncryption + 42:e8:50:b2:e7:88:75:86:0b:bb:29:e3:aa:c6:0e:4c:e8:ea: + 3d:0c:02:31:7f:3b:80:0c:3f:80:af:45:d6:62:27:a0:0e:e7: + 26:09:12:97:95:f8:d9:9b:89:b5:ef:56:64:f1:de:82:74:e0: + 31:0a:cc:90:0a:bd:50:b8:54:95:0a:ae:3b:40:df:76:b6:d1: + 01:2e:f3:96:9f:52:d4:e9:14:6d:b7:14:9d:45:99:33:36:2a: + 01:0b:15:1a:ed:55:dc:64:83:65:1a:06:42:d9:c7:dc:97:d4: + 02:81:c2:58:2b:ea:e4:b7:ae:84:3a:e4:3f:f1:2e:fa:ec:f3: + 40:5d:b8:6a:d5:5e:e1:e8:2f:e2:2f:48:a4:38:a1:4f:22:e3: + 4f:66:94:aa:02:78:9a:2b:7a:5d:aa:aa:51:a5:e3:d0:91:e9: + 1d:f9:08:ed:8b:51:c9:a6:af:46:85:b5:1c:ed:12:a1:28:33: + 75:36:00:d8:5c:14:65:96:c0:28:7d:47:50:a4:89:5f:b0:72: + 1a:4b:13:17:26:0f:f0:b8:65:3c:e9:96:36:f9:bf:90:59:33: + 87:1f:01:03:25:f8:f0:3a:9b:33:02:d0:0a:43:b5:0a:cf:62: + a1:45:38:37:07:9d:9c:94:0b:31:c6:3c:34:b7:fc:5a:0c:e4: + bf:23:f6:7d +-----BEGIN CERTIFICATE----- +MIIFqjCCBJKgAwIBAgIBAjANBgkqhkiG9w0BAQsFADCByzELMAkGA1UEBhMCRVMx +EjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0wKwYDVQQK +EyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAiBgNVBAsT +G0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENBQyBDQTEN +MAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MB4X +DTE2MDExMjEyNDU0MVoXDTQ2MDExMjEyNDU0MVowgcoxCzAJBgNVBAYTAkVTMRIw +EAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNlbG9uYTEtMCsGA1UEChMk +VW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1bnlhMSQwIgYDVQQLExtB +cnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxDzANBgNVBAMTBmNsaWVudDENMAsG +A1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl5n6eg5N4h2lsagUGGTHZr/emR2S +O4aCTZU596ZWSZcUT+M3AGz00B1WeecZtd02FY4dV3tZKdIRv1hI4PdBPRZkjaIL +Sqz6xoPcECos2ZdI7hEqvEtg3bkuj0XKhws4ZRz4oh35UKpuYPlI31cSI+HnDIFc +n8Wy5pmZlTBtVzYGjP37+U9g0jy6rihWL9pYXOjFe+x22Shu+4wH+dcjw3J2PPrc +IGePzBbgkQfVaPkgTX1cLQIEFnZS81O+o9wN1ftrVSnzUjXIfZnRSpS+sY79hRgl +60HpVtqvYoQgCgAXlJKUkWr4VDcX7h67+5Nxkdnk6bg7GH1tfUzOWFX5QQIDAQAB +o4IBljCCAZIwCQYDVR0TBAIwADAtBglghkgBhvhCAQ0EIBYeRWFzeS1SU0EgR2Vu +ZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQWBBQbiAbVMx1cSEa13niJNpaROnRD +GDCCAQAGA1UdIwSB+DCB9YAU3J7NfVwfU3hBHXh5sGmWOqa0TuqhgdGkgc4wgcsx +CzAJBgNVBAYTAkVTMRIwEAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNl +bG9uYTEtMCsGA1UEChMkVW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1 +bnlhMSQwIgYDVQQLExtBcnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxEDAOBgNV +BAMTB0xDQUMgQ0ExDTALBgNVBCkTBExDQUMxHjAcBgkqhkiG9w0BCQEWD2xjYWNA +YWMudXBjLmVkdYIJAJH118PApk5hMBMGA1UdJQQMMAoGCCsGAQUFBwMCMAsGA1Ud +DwQEAwIHgDARBgNVHREECjAIggZjbGllbnQwDQYJKoZIhvcNAQELBQADggEBAELo +ULLniHWGC7sp46rGDkzo6j0MAjF/O4AMP4CvRdZiJ6AO5yYJEpeV+NmbibXvVmTx +3oJ04DEKzJAKvVC4VJUKrjtA33a20QEu85afUtTpFG23FJ1FmTM2KgELFRrtVdxk +g2UaBkLZx9yX1AKBwlgr6uS3roQ65D/xLvrs80BduGrVXuHoL+IvSKQ4oU8i409m +lKoCeJorel2qqlGl49CR6R35CO2LUcmmr0aFtRztEqEoM3U2ANhcFGWWwCh9R1Ck +iV+wchpLExcmD/C4ZTzpljb5v5BZM4cfAQMl+PA6mzMC0ApDtQrPYqFFODcHnZyU +CzHGPDS3/FoM5L8j9n0= +-----END CERTIFICATE----- diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix new file mode 100644 index 0000000..e471969 --- /dev/null +++ b/m/owl1/configuration.nix @@ -0,0 +1,28 @@ +{ config, pkgs, ... }: + +{ + imports = [ + ../common/ssf.nix + ../module/ceph.nix + ../module/emulation.nix + ../module/slurm-client.nix + ../module/slurm-firewall.nix + ../module/debuginfod.nix + ../module/hut-substituter.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c"; + + networking = { + hostName = "owl1"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.1"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.1"; + prefixLength = 24; + } ]; + }; +} diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix new file mode 100644 index 0000000..e28c5e5 --- /dev/null +++ b/m/owl2/configuration.nix @@ -0,0 +1,29 @@ +{ config, pkgs, ... }: + +{ + imports = [ + ../common/ssf.nix + ../module/ceph.nix + ../module/emulation.nix + ../module/slurm-client.nix + ../module/slurm-firewall.nix + ../module/debuginfod.nix + ../module/hut-substituter.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629"; + + networking = { + hostName = "owl2"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.2"; + prefixLength = 24; + } ]; + # Watch out! The OmniPath device is not in the same place here: + interfaces.ibp129s0.ipv4.addresses = [ { + address = "10.0.42.2"; + prefixLength = 24; + } ]; + }; +} diff --git a/m/raccoon/configuration.nix b/m/raccoon/configuration.nix new file mode 100644 index 0000000..38ce719 --- /dev/null +++ b/m/raccoon/configuration.nix @@ -0,0 +1,98 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/base.nix + ../common/ssf/hosts.nix + ../module/emulation.nix + ../module/debuginfod.nix + ../module/nvidia.nix + ../eudy/kernel/perf.nix + ./wireguard.nix + ../module/hut-substituter.nix + ]; + + # Don't install Grub on the disk yet + boot.loader.grub.device = "nodev"; + + # Enable serial console + boot.kernelParams = [ + "console=tty1" + "console=ttyS1,115200" + ]; + + networking = { + hostName = "raccoon"; + # Only BSC DNSs seem to be reachable from the office VLAN + nameservers = [ "84.88.52.35" "84.88.52.36" ]; + defaultGateway = "84.88.51.129"; + interfaces.eno0.ipv4.addresses = [ { + address = "84.88.51.152"; + prefixLength = 25; + } ]; + interfaces.enp5s0f1.ipv4.addresses = [ { + address = "10.0.44.1"; + prefixLength = 24; + } ]; + nat = { + enable = true; + internalInterfaces = [ "enp5s0f1" ]; + externalInterface = "eno0"; + }; + hosts = { + "10.0.44.4" = [ "tent" ]; + "84.88.53.236" = [ "apex" ]; + }; + }; + + # Mount the NFS home + fileSystems."/nfs/home" = { + device = "10.106.0.30:/home"; + fsType = "nfs"; + options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ]; + }; + + # Enable performance governor + powerManagement.cpuFreqGovernor = "performance"; + + hardware.nvidia.open = false; # Maxwell is older than Turing architecture + + services.openssh.settings.X11Forwarding = true; + + services.prometheus.exporters.node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + listenAddress = "127.0.0.1"; + }; + + users.motd = '' + ⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⢰⠇⡀⠀⠙⠻⡿⣦⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⡎⢰⣧⠀⠀⠀⠁⠈⠛⢿⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣠⣴⡦⠶⠟⠓⠚⠻⡄⠀ + ⠀⠀⠀⠀⠀⠀⣧⠀⣱⣀⣰⣧⠀⢀⠀⣘⣿⣿⣦⣶⣄⣠⡀⠀⠀⣀⣀⣤⣴⣄⣀⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⣿⠿⠏⠁⠀⣀⣠⣶⣿⡶⣿⠀ + ⠀⠀⠀⠀⠀⠀⣹⣆⠘⣿⣿⣿⣇⢸⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣾⣿⣿⣿⣿⣿⣿⣿⣿⣶⣶⣦⡀⣀⣤⣠⣤⡾⠋⠀⢀⣤⣶⣿⣿⣿⣿⣿⣿⣿⡀ + ⠀⠀⠀⠀⠀⠀⠘⢿⡄⢼⣿⣿⣿⣿⣿⡟⠻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣵⣾⡾⠙⣋⣩⣽⣿⣿⣿⣿⢋⡼⠁ + ⠀⠀⠀⠀⠀⠀⠀⠈⢻⣄⠸⢿⣿⣿⠿⠷⠀⠈⠀⣭⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣾⣿⣿⣿⣿⣿⣿⠇⡼⠁⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⢾⣯⡀⠀⢼⡿⠀⠀⠀⢼⠿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⣿⡿⣿⣿⣿⠿⣿⣯⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⢋⡼⠁⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢻⡏⠠⣦⠁⠀⠀⠀⠀⠀⠟⠛⠛⣿⣿⣿⣿⣿⠿⠁⠀⠁⢿⠙⠁⠀⠛⠹⣿⣏⣾⣿⣿⣿⣿⣿⣿⣿⣿⠿⠃⣹⠁⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⣘⣧⠀⠙⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⡿⡿⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⢹⣿⠿⢿⣿⣿⣿⣿⣿⠋⢀⡤⠛⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢹⡯⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣿⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠁⠀⢸⣿⣿⣿⠛⠉⠀⣰⠷⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⠇⠀⠀⠀⠀⠀⢀⣿⡇⠀⠀⢻⣿⣿⠁⠀⠀⢠⣾⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⠟⢿⣿⣄⡀⢸⣿⡀⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⢀⣿⠀⠀⠀⢰⣿⣿⡛⣿⣿⡄⢠⡺⠿⡍⠁⢀⣤⣿⣿⣿⠿⣷⣮⣉⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠈⣿⠀⠀⠈⣧⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⢾⠉⠃⠀⣴⣿⣟⠻⣿⣿⣿⡇⢸⣿⣶⠀⢀⣾⣿⣿⣟⠿⣷⣾⣿⣿⣿⣿⣦⣤⣤⡤⠀⠀⠀⠀⠀⠁⠀⠀⠀⣼⠗⠀⠀⠀⠀ + ⠀⠀⠐⢄⡀⠀⠀⠀⢘⡀⠀⢶⣾⣿⣿⣿⣿⡿⠋⠁⠈⠻⠉⠀⠚⠻⣿⣿⣿⣶⣾⣿⣿⣿⣿⣿⣿⣷⣬⣤⣶⣦⡀⣾⣶⣇⠀⠀⠈⢉⣷⠀⠀⠀⠀ + ⠀⠀⠀⠀⠈⠓⠶⢦⡽⠄⣈⣿⣿⣿⣿⣿⠏⠀⠀⠀⠀⠀⠀⠀⠀⠀⠹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡓⠙⣿⡟⠀⠀⠀⠈⠛⣷⣶⡄⠀ + ⠀⠀⠀⠀⠀⠀⠀⢀⣬⠆⢠⣍⣛⠻⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣉⣀⡀⠀⠀⠈⠛⢿⣦⡀ + ⠐⠒⠒⠶⠶⠶⢦⣬⣟⣥⣀⡉⠛⠻⠶⢁⣤⣾⣿⣿⣿⣷⡄⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⣿⣟⡛⠿⠭⠭⠭⠭⠭⠿⠿⠿⢿⣿⣟⠃⠀⠀⠀⠹⣟⠓ + ⠀⣀⣠⠤⠤⢤⣤⣾⣤⡄⣉⣉⣙⣓⡂⣿⣿⣭⣹⣿⣿⣿⣿⡰⣂⣀⢀⠀⠻⣿⠛⠻⠟⠡⣶⣾⣿⣿⣿⣿⣿⣿⣿⡖⠒⠒⠒⠛⠷⢤⡀⢰⣴⣿⡆ + ⠀⠀⠀⢀⣠⡴⠾⠟⠻⣟⡉⠉⠉⠉⢁⢿⣿⣿⣿⣿⣿⣿⡿⣱⣿⣭⡌⠤⠀⠀⠐⣶⣌⡻⣶⣭⡻⢿⣿⣿⣿⣿⣿⣯⣥⣤⣦⠀⠠⣴⣶⣶⣿⡟⢿ + ⢀⠔⠊⠉⠀⠀⠀⠀⢸⣯⣤⠀⠀⠠⣼⣮⣟⣿⣿⣿⣻⣭⣾⣿⣿⣷⣶⣦⠶⣚⣾⣿⣿⣷⣜⣿⣿⣶⣝⢿⣿⣿⣿⣿⣷⣦⣄⣰⡄⠈⢿⣿⡿⣇⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠈⢡⢇⠀⠀⣠⣿⣿⣿⣯⣟⣛⣛⣛⣛⣛⣩⣭⣴⣶⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣻⣿⣧⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⣾⠏⠀⢹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣍⣿⣿⣿⣿⡄⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣾⡁⢈⣾⣿⡿⠛⣛⣿⣿⣿⣿ DO YOU BRING FEEDS? ⣿⣿⣿⣿⣿⣿⡏⠈⠙⠈⠁⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⡿⠛⠉⣽⣿⣷⣾⡿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠌⠛⠉⠀⠁⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠹⠋⠀⢻⣿⣿⣿⣿⠿⢿⣿⣿⣿⣿⣿⣿⠿⣿⣿⣿⣿⠿⠛⠋⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⠉⠁⠀⠀⠀⠀⠀⠈⠉⠉⠀⠀⠈⠋⠉⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ + ''; +} diff --git a/m/raccoon/wireguard.nix b/m/raccoon/wireguard.nix new file mode 100644 index 0000000..daf4883 --- /dev/null +++ b/m/raccoon/wireguard.nix @@ -0,0 +1,48 @@ +{ config, pkgs, ... }: + +{ + networking.nat = { + enable = true; + enableIPv6 = false; + externalInterface = "eno0"; + internalInterfaces = [ "wg0" ]; + }; + + networking.firewall = { + allowedUDPPorts = [ 666 ]; + }; + + age.secrets.wgRaccoon.file = ../../secrets/wg-raccoon.age; + + # Enable WireGuard + networking.wireguard.enable = true; + networking.wireguard.interfaces = { + wg0 = { + ips = [ "10.106.0.236/24" ]; + listenPort = 666; + privateKeyFile = config.age.secrets.wgRaccoon.path; + # Public key: QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI= + peers = [ + { + name = "fox"; + publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y="; + allowedIPs = [ "10.106.0.1/32" ]; + endpoint = "fox.ac.upc.edu:666"; + persistentKeepalive = 25; + } + { + name = "apex"; + publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA="; + allowedIPs = [ "10.106.0.30/32" "10.0.40.0/24" ]; + endpoint = "ssfhead.bsc.es:666"; + persistentKeepalive = 25; + } + ]; + }; + }; + + networking.hosts = { + "10.106.0.1" = [ "fox.wg" ]; + "10.106.0.30" = [ "apex.wg" ]; + }; +} diff --git a/m/tent/blackbox.yml b/m/tent/blackbox.yml new file mode 100644 index 0000000..ccd701e --- /dev/null +++ b/m/tent/blackbox.yml @@ -0,0 +1,14 @@ +modules: + http_2xx: + prober: http + timeout: 5s + http: + preferred_ip_protocol: "ip4" + follow_redirects: true + valid_status_codes: [] # Defaults to 2xx + method: GET + icmp: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" diff --git a/m/tent/configuration.nix b/m/tent/configuration.nix new file mode 100644 index 0000000..a165b6b --- /dev/null +++ b/m/tent/configuration.nix @@ -0,0 +1,85 @@ +{ config, pkgs, lib, ... }: + +{ + imports = [ + ../common/xeon.nix + ../common/ssf/hosts.nix + ../module/emulation.nix + ../module/debuginfod.nix + ./monitoring.nix + ./nginx.nix + ./nix-serve.nix + ./gitlab-runner.nix + ./gitea.nix + ../hut/public-inbox.nix + ../hut/msmtp.nix + ../module/p.nix + ../module/vpn-dac.nix + ../module/hut-substituter.nix + ]; + + # Select the this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675"; + + networking = { + hostName = "tent"; + interfaces.eno1.ipv4.addresses = [ + { + address = "10.0.44.4"; + prefixLength = 24; + } + ]; + + # Only BSC DNSs seem to be reachable from the office VLAN + nameservers = [ "84.88.52.35" "84.88.52.36" ]; + search = [ "bsc.es" "ac.upc.edu" ]; + defaultGateway = "10.0.44.1"; + hosts = { + "84.88.53.236" = [ "apex" ]; + "10.0.44.1" = [ "raccoon" ]; + }; + }; + + services.p.enable = true; + + services.prometheus.exporters.node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + listenAddress = "127.0.0.1"; + }; + + boot.swraid = { + enable = true; + mdadmConf = '' + DEVICE partitions + ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d + MAILADDR root + ''; + }; + + fileSystems."/vault" = { + device = "/dev/disk/by-label/vault"; + fsType = "ext4"; + }; + + # Make a /vault/$USER directory for each user. + systemd.services.create-vault-dirs = let + # Take only normal users in tent + users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; + commands = lib.concatLists (lib.mapAttrsToList + (_: user: [ + "install -d -o ${user.name} -g ${user.group} -m 0711 /vault/home/${user.name}" + ]) users); + script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands); + in { + enable = true; + wants = [ "local-fs.target" ]; + after = [ "local-fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = script; + }; + + # disable automatic garbage collector + nix.gc.automatic = lib.mkForce false; +} diff --git a/m/tent/gitea.nix b/m/tent/gitea.nix new file mode 100644 index 0000000..546ac5f --- /dev/null +++ b/m/tent/gitea.nix @@ -0,0 +1,30 @@ +{ config, lib, ... }: +{ + services.gitea = { + enable = true; + appName = "Gitea in the jungle"; + + settings = { + server = { + ROOT_URL = "https://jungle.bsc.es/git/"; + LOCAL_ROOT_URL = "https://jungle.bsc.es/git/"; + LANDING_PAGE = "explore"; + }; + metrics.ENABLED = true; + service = { + DISABLE_REGISTRATION = true; + REGISTER_MANUAL_CONFIRM = true; + ENABLE_NOTIFY_MAIL = true; + }; + log.LEVEL = "Warn"; + + mailer = { + ENABLED = true; + FROM = "jungle-robot@bsc.es"; + PROTOCOL = "sendmail"; + SENDMAIL_PATH = "/run/wrappers/bin/sendmail"; + SENDMAIL_ARGS = "--"; + }; + }; + }; +} diff --git a/m/tent/gitlab-runner.nix b/m/tent/gitlab-runner.nix new file mode 100644 index 0000000..166b8ca --- /dev/null +++ b/m/tent/gitlab-runner.nix @@ -0,0 +1,93 @@ +{ pkgs, lib, config, ... }: + +{ + age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age; + age.secrets.tent-gitlab-runner-pm-docker.file = ../../secrets/tent-gitlab-runner-pm-docker-token.age; + age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age; + + services.gitlab-runner = let sec = config.age.secrets; in { + enable = true; + settings.concurrent = 5; + services = { + # For gitlab.pm.bsc.es + gitlab-pm-shell = { + executor = "shell"; + environmentVariables = { + SHELL = "${pkgs.bash}/bin/bash"; + }; + authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-shell.path; + preGetSourcesScript = pkgs.writeScript "setup" '' + echo "This is the preGetSources script running, brace for impact" + env + ''; + }; + gitlab-pm-docker = { + authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-docker.path; + executor = "docker"; + dockerImage = "debian:stable"; + }; + + # For gitlab.bsc.es + gitlab-bsc-docker = { + # gitlab.bsc.es still uses the old token mechanism + registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path; + tagList = [ "docker" "tent" "nix" ]; + executor = "docker"; + dockerImage = "alpine"; + dockerVolumes = [ + "/nix/store:/nix/store:ro" + "/nix/var/nix/db:/nix/var/nix/db:ro" + "/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro" + ]; + dockerDisableCache = true; + registrationFlags = [ + # Increase build log length to 64 MiB + "--output-limit 65536" + ]; + preBuildScript = pkgs.writeScript "setup-container" '' + mkdir -p -m 0755 /nix/var/log/nix/drvs + mkdir -p -m 0755 /nix/var/nix/gcroots + mkdir -p -m 0755 /nix/var/nix/profiles + mkdir -p -m 0755 /nix/var/nix/temproots + mkdir -p -m 0755 /nix/var/nix/userpool + mkdir -p -m 1777 /nix/var/nix/gcroots/per-user + mkdir -p -m 1777 /nix/var/nix/profiles/per-user + mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root + mkdir -p -m 0700 "$HOME/.nix-defexpr" + mkdir -p -m 0700 "$HOME/.ssh" + cat >> "$HOME/.ssh/known_hosts" << EOF + bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT + gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3 + EOF + . ${pkgs.nix}/etc/profile.d/nix-daemon.sh + # Required to load SSL certificate paths + . ${pkgs.cacert}/nix-support/setup-hook + ''; + environmentVariables = { + ENV = "/etc/profile"; + USER = "root"; + NIX_REMOTE = "daemon"; + PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin"; + }; + }; + }; + }; + + systemd.services.gitlab-runner.serviceConfig = { + DynamicUser = lib.mkForce false; + User = "gitlab-runner"; + Group = "gitlab-runner"; + ExecStart = lib.mkForce + ''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}''; + }; + + users.users.gitlab-runner = { + uid = config.ids.uids.gitlab-runner; + home = "/var/lib/gitlab-runner"; + description = "Gitlab Runner"; + group = "gitlab-runner"; + extraGroups = [ "docker" ]; + createHome = true; + }; + users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner; +} diff --git a/m/tent/monitoring.nix b/m/tent/monitoring.nix new file mode 100644 index 0000000..c241806 --- /dev/null +++ b/m/tent/monitoring.nix @@ -0,0 +1,217 @@ +{ config, lib, pkgs, ... }: + +{ + imports = [ + ../module/meteocat-exporter.nix + ../module/upc-qaire-exporter.nix + ../module/nix-daemon-exporter.nix + ]; + + age.secrets.grafanaJungleRobotPassword = { + file = ../../secrets/jungle-robot-password.age; + owner = "grafana"; + mode = "400"; + }; + + services.grafana = { + enable = true; + settings = { + server = { + domain = "jungle.bsc.es"; + root_url = "%(protocol)s://%(domain)s/grafana"; + serve_from_sub_path = true; + http_port = 2342; + http_addr = "127.0.0.1"; + }; + smtp = { + enabled = true; + from_address = "jungle-robot@bsc.es"; + user = "jungle-robot"; + # Read the password from a file, which is only readable by grafana user + # https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider + password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}"; + host = "mail.bsc.es:465"; + startTLS_policy = "NoStartTLS"; + }; + feature_toggles.publicDashboards = true; + "auth.anonymous".enabled = true; + log.level = "warn"; + }; + }; + + services.prometheus = { + enable = true; + port = 9001; + retentionTime = "5y"; + listenAddress = "127.0.0.1"; + }; + + # We need access to the devices to monitor the disk space + systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only"; + + # Credentials for IPMI exporter + age.secrets.ipmiYml = { + file = ../../secrets/ipmi.yml.age; + owner = "ipmi-exporter"; + }; + + # Create an IPMI group and assign the ipmi0 device + users.groups.ipmi = {}; + services.udev.extraRules = '' + SUBSYSTEM=="ipmi", KERNEL=="ipmi0", GROUP="ipmi", MODE="0660" + ''; + + # Add a new ipmi-exporter user that can read the ipmi0 device + users.users.ipmi-exporter = { + isSystemUser = true; + group = "ipmi"; + }; + + # Disable dynamic user so we have the ipmi-exporter user available for the credentials + systemd.services.prometheus-ipmi-exporter.serviceConfig = { + DynamicUser = lib.mkForce false; + PrivateDevices = lib.mkForce false; + User = lib.mkForce "ipmi-exporter"; + Group = lib.mkForce "ipmi"; + RestrictNamespaces = lib.mkForce false; + # Fake uid to 0 so it shuts up + ExecStart = let + cfg = config.services.prometheus.exporters.ipmi; + in lib.mkForce (lib.concatStringsSep " " ([ + "${pkgs.util-linux}/bin/unshare --map-user 0" + "${pkgs.prometheus-ipmi-exporter}/bin/ipmi_exporter" + "--web.listen-address ${cfg.listenAddress}:${toString cfg.port}" + "--config.file ${lib.escapeShellArg cfg.configFile}" + ] ++ cfg.extraFlags)); + }; + + services.prometheus = { + exporters = { + ipmi = { + enable = true; + configFile = config.age.secrets.ipmiYml.path; + #extraFlags = [ "--log.level=debug" ]; + listenAddress = "127.0.0.1"; + }; + node = { + enable = true; + enabledCollectors = [ "logind" ]; + port = 9002; + listenAddress = "127.0.0.1"; + }; + blackbox = { + enable = true; + listenAddress = "127.0.0.1"; + configFile = ./blackbox.yml; + }; + }; + + scrapeConfigs = [ + { + job_name = "local"; + static_configs = [{ + targets = [ + "127.0.0.1:9002" # Node exporter + #"127.0.0.1:9115" # Blackbox exporter + "127.0.0.1:9290" # IPMI exporter for local node + "127.0.0.1:9928" # UPC Qaire custom exporter + "127.0.0.1:9929" # Meteocat custom exporter + "127.0.0.1:9999" # Nix-daemon custom exporter + ]; + }]; + } + { + job_name = "blackbox-http"; + metrics_path = "/probe"; + params = { module = [ "http_2xx" ]; }; + static_configs = [{ + targets = [ + "https://www.google.com/robots.txt" + "https://pm.bsc.es/" + "https://pm.bsc.es/gitlab/" + "https://jungle.bsc.es/" + "https://gitlab.bsc.es/" + ]; + }]; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:9115"; + } + ]; + } + { + job_name = "blackbox-icmp"; + metrics_path = "/probe"; + params = { module = [ "icmp" ]; }; + static_configs = [{ + targets = [ + "1.1.1.1" + "8.8.8.8" + "ssfhead" + "raccoon" + "anella-bsc.cesca.cat" + "upc-anella.cesca.cat" + "fox.ac.upc.edu" + "fox-ipmi.ac.upc.edu" + "arenys5.ac.upc.edu" + "arenys0-2.ac.upc.edu" + "epi01.bsc.es" + "axle.bsc.es" + ]; + }]; + relabel_configs = [ + { + # Takes the address and sets it in the "target=" URL parameter + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + # Sets the "instance" label with the remote host we are querying + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + # Shows the host target address instead of the blackbox address + target_label = "__address__"; + replacement = "127.0.0.1:9115"; + } + ]; + } + { + job_name = "ipmi-raccoon"; + metrics_path = "/ipmi"; + static_configs = [ + { targets = [ "127.0.0.1:9290" ]; } + ]; + params = { + target = [ "raccoon-ipmi" ]; + module = [ "raccoon" ]; + }; + } + { + job_name = "ipmi-fox"; + metrics_path = "/ipmi"; + static_configs = [ + { targets = [ "127.0.0.1:9290" ]; } + ]; + params = { + target = [ "fox-ipmi.ac.upc.edu" ]; + module = [ "fox" ]; + }; + } + ]; + }; +} diff --git a/m/tent/nginx.nix b/m/tent/nginx.nix new file mode 100644 index 0000000..4568690 --- /dev/null +++ b/m/tent/nginx.nix @@ -0,0 +1,79 @@ +{ theFlake, pkgs, ... }: +let + website = pkgs.stdenv.mkDerivation { + name = "jungle-web"; + src = pkgs.fetchgit { + url = "https://jungle.bsc.es/git/rarias/jungle-website.git"; + rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1"; + hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4="; + }; + buildInputs = [ pkgs.hugo ]; + buildPhase = '' + rm -rf public/ + hugo + ''; + installPhase = '' + cp -r public $out + ''; + # Don't mess doc/ + dontFixup = true; + }; +in +{ + networking.firewall.allowedTCPPorts = [ 80 ]; + services.nginx = { + enable = true; + virtualHosts."jungle.bsc.es" = { + root = "${website}"; + listen = [ + { + addr = "0.0.0.0"; + port = 80; + } + ]; + extraConfig = '' + set_real_ip_from 127.0.0.1; + set_real_ip_from 84.88.52.107; + real_ip_recursive on; + real_ip_header X-Forwarded-For; + + location /git { + rewrite ^/git$ / break; + rewrite ^/git/(.*) /$1 break; + proxy_pass http://127.0.0.1:3000; + proxy_redirect http:// $scheme://; + } + location /cache { + rewrite ^/cache/(.*) /$1 break; + proxy_pass http://127.0.0.1:5000; + proxy_redirect http:// $scheme://; + } + location /lists { + proxy_pass http://127.0.0.1:8081; + proxy_redirect http:// $scheme://; + } + location /grafana { + proxy_pass http://127.0.0.1:2342; + proxy_redirect http:// $scheme://; + proxy_set_header Host $host; + # Websockets + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + location ~ ^/~(.+?)(/.*)?$ { + alias /vault/home/$1/public_html$2; + index index.html index.htm; + autoindex on; + absolute_redirect off; + } + location /p/ { + alias /var/lib/p/; + } + location /pub/ { + alias /vault/pub/; + } + ''; + }; + }; +} diff --git a/m/tent/nix-serve.nix b/m/tent/nix-serve.nix new file mode 100644 index 0000000..35ccd72 --- /dev/null +++ b/m/tent/nix-serve.nix @@ -0,0 +1,16 @@ +{ config, ... }: + +{ + age.secrets.nixServe.file = ../../secrets/nix-serve.age; + + services.nix-serve = { + enable = true; + # Only listen locally, as we serve it via ssh + bindAddress = "127.0.0.1"; + port = 5000; + + secretKeyFile = config.age.secrets.nixServe.path; + # Public key: + # jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0= + }; +} diff --git a/m/weasel/configuration.nix b/m/weasel/configuration.nix new file mode 100644 index 0000000..995d0fd --- /dev/null +++ b/m/weasel/configuration.nix @@ -0,0 +1,33 @@ +{ lib, ... }: + +{ + imports = [ + ../common/ssf.nix + ../module/hut-substituter.nix + ]; + + # Select this using the ID to avoid mismatches + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca"; + + # No swap, there is plenty of RAM + swapDevices = lib.mkForce []; + + # Users with sudo access + users.groups.wheel.members = [ "abonerib" "anavarro" ]; + + # Run julia installed with juliaup using julia's own libraries: + # NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia + programs.nix-ld.enable = true; + + networking = { + hostName = "weasel"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.6"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.6"; + prefixLength = 24; + } ]; + }; +} diff --git a/nixos-config.nix b/nixos-config.nix new file mode 100644 index 0000000..2e36516 --- /dev/null +++ b/nixos-config.nix @@ -0,0 +1 @@ +(builtins.getFlake (toString ./.)).nixosConfigurations diff --git a/overlay.nix b/overlay.nix index 4831c55..df21edf 100644 --- a/overlay.nix +++ b/overlay.nix @@ -6,7 +6,12 @@ with final.lib; let callPackage = final.callPackage; - bscPkgs = { + mkStrict = drv: if (isDerivation drv && drv ? overrideAttrs && !(drv ? strictDeps)) + then drv.overrideAttrs { strictDeps = true; } + else drv; + + bscPkgs = mapAttrs (_: mkStrict) { + amd-uprof = prev.callPackage ./pkgs/amd-uprof/default.nix { }; bench6 = callPackage ./pkgs/bench6/default.nix { }; bigotes = callPackage ./pkgs/bigotes/default.nix { }; clangOmpss2 = callPackage ./pkgs/llvm-ompss2/default.nix { }; @@ -14,12 +19,24 @@ let clangOmpss2Nodes = callPackage ./pkgs/llvm-ompss2/default.nix { ompss2rt = final.nodes; openmp = final.openmp; }; clangOmpss2NodesOmpv = callPackage ./pkgs/llvm-ompss2/default.nix { ompss2rt = final.nodes; openmp = final.openmpv; }; clangOmpss2Unwrapped = callPackage ./pkgs/llvm-ompss2/clang.nix { }; + cudainfo = prev.callPackage ./pkgs/cudainfo/default.nix { }; #extrae = callPackage ./pkgs/extrae/default.nix { }; # Broken and outdated gpi-2 = callPackage ./pkgs/gpi-2/default.nix { }; intelPackages_2023 = callPackage ./pkgs/intel-oneapi/2023.nix { }; jemallocNanos6 = callPackage ./pkgs/nanos6/jemalloc.nix { }; + # FIXME: Extend this to all linuxPackages variants. Open problem, see: + # https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636 + linuxPackages = prev.linuxPackages.extend (_final: _prev: { + amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { }; + }); + linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: { + amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { }; + }); lmbench = callPackage ./pkgs/lmbench/default.nix { }; mcxx = callPackage ./pkgs/mcxx/default.nix { }; + meteocat-exporter = prev.callPackage ./pkgs/meteocat-exporter/default.nix { }; + mpi = final.mpich; # Set MPICH as default + mpich = callPackage ./pkgs/mpich/default.nix { mpich = prev.mpich; }; nanos6 = callPackage ./pkgs/nanos6/default.nix { }; nanos6Debug = final.nanos6.override { enableDebug = true; }; nixtools = callPackage ./pkgs/nixtools/default.nix { }; @@ -34,6 +51,7 @@ let ovni = callPackage ./pkgs/ovni/default.nix { }; ovniGit = final.ovni.override { useGit = true; }; paraverKernel = callPackage ./pkgs/paraver/kernel.nix { }; + prometheus-slurm-exporter = prev.callPackage ./pkgs/slurm-exporter/default.nix { }; #pscom = callPackage ./pkgs/parastation/pscom.nix { }; # Unmaintaned #psmpi = callPackage ./pkgs/parastation/psmpi.nix { }; # Unmaintaned sonar = callPackage ./pkgs/sonar/default.nix { }; @@ -43,6 +61,7 @@ let stdenvClangOmpss2NodesOmpv = final.stdenv.override { cc = final.clangOmpss2NodesOmpv; allowedRequisites = null; }; tagaspi = callPackage ./pkgs/tagaspi/default.nix { }; tampi = callPackage ./pkgs/tampi/default.nix { }; + upc-qaire-exporter = prev.callPackage ./pkgs/upc-qaire-exporter/default.nix { }; wxparaver = callPackage ./pkgs/paraver/default.nix { }; }; diff --git a/pkgs/amd-uprof/default.nix b/pkgs/amd-uprof/default.nix new file mode 100644 index 0000000..9eb6707 --- /dev/null +++ b/pkgs/amd-uprof/default.nix @@ -0,0 +1,89 @@ +{ stdenv +, lib +, curl +, cacert +, runCommandLocal +, autoPatchelfHook +, elfutils +, glib +, libGL +, ncurses5 +, xorg +, zlib +, libxkbcommon +, freetype +, fontconfig +, libGLU +, dbus +, rocmPackages +, libxcrypt-legacy +, numactl +, radare2 +}: + +let + version = "5.1.701"; + tarball = "AMDuProf_Linux_x64_${version}.tar.bz2"; + + # NOTE: Remember to update the radare2 patch below if AMDuProfPcm changes. + uprofSrc = runCommandLocal tarball { + nativeBuildInputs = [ curl ]; + outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk="; + SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt"; + } '' + curl \ + -o $out \ + 'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \ + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \ + -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \ + -H 'Accept-Language: en-US,en;q=0.5' \ + -H 'Accept-Encoding: gzip, deflate, br, zstd' \ + -H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n' + ''; + +in + stdenv.mkDerivation { + pname = "AMD-uProf"; + inherit version; + src = uprofSrc; + dontStrip = true; + phases = [ "installPhase" "fixupPhase" ]; + nativeBuildInputs = [ autoPatchelfHook radare2 ]; + buildInputs = [ + stdenv.cc.cc.lib + ncurses5 + elfutils + glib + libGL + libGLU + libxcrypt-legacy + xorg.libX11 + xorg.libXext + xorg.libXi + xorg.libXmu + xorg.libxcb + xorg.xcbutilwm + xorg.xcbutilrenderutil + xorg.xcbutilkeysyms + xorg.xcbutilimage + fontconfig.lib + libxkbcommon + zlib + freetype + dbus + rocmPackages.rocprofiler + numactl + ]; + installPhase = '' + set -x + mkdir -p $out + tar -x -v -C $out --strip-components=1 -f $src + rm $out/bin/AMDPowerProfilerDriverSource.tar.gz + patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so + patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys + echo "16334a51fcc48668307ad94e20482ca4 $out/bin/AMDuProfPcm" | md5sum -c - + radare2 -w -q -i ${./libnuma.r2} $out/bin/AMDuProfPcm + patchelf --add-needed libnuma.so $out/bin/AMDuProfPcm + set +x + ''; + } diff --git a/pkgs/amd-uprof/driver.nix b/pkgs/amd-uprof/driver.nix new file mode 100644 index 0000000..e69ef2c --- /dev/null +++ b/pkgs/amd-uprof/driver.nix @@ -0,0 +1,33 @@ +{ stdenv +, lib +, amd-uprof +, kernel +, runCommandLocal +}: + +let + version = amd-uprof.version; + tarball = amd-uprof.src; +in stdenv.mkDerivation { + pname = "AMDPowerProfilerDriver"; + inherit version; + src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } '' + set -x + tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz + mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out + set +x + ''; + hardeningDisable = [ "pic" "format" ]; + nativeBuildInputs = kernel.moduleBuildDependencies; + patches = [ ./makefile.patch ./hrtimer.patch ]; + makeFlags = [ + "KERNEL_VERSION=${kernel.modDirVersion}" + "KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build" + "INSTALL_MOD_PATH=$(out)" + ]; + meta = { + description = "AMD Power Profiler Driver"; + homepage = "https://www.amd.com/es/developer/uprof.html"; + platforms = lib.platforms.linux; + }; +} diff --git a/pkgs/amd-uprof/hrtimer.patch b/pkgs/amd-uprof/hrtimer.patch new file mode 100644 index 0000000..24befa3 --- /dev/null +++ b/pkgs/amd-uprof/hrtimer.patch @@ -0,0 +1,31 @@ +--- a/src/PmcTimerConfig.c 2025-09-04 12:17:16.771707049 +0200 ++++ b/src/PmcTimerConfig.c 2025-09-04 12:17:04.878515468 +0200 +@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo) + + DRVPRINT("pTimerConfig(%p)", pTimerConfig); + +- hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); ++ hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + } + + int PmcSetupTimer(ClientContext* pClientCtx) +@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient + { + /* Interval in ms */ + pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000); +- pTimerConfig->m_hrTimer.function = PmcTimerCallback; + + DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time); + } +--- a/src/PwrProfTimer.c 2025-09-04 12:18:08.750544327 +0200 ++++ b/src/PwrProfTimer.c 2025-09-04 12:18:28.557863382 +0200 +@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu) + pCoreClientData = &per_cpu(g_coreClientData, cpu); + + // initialize HR timer +- hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); +- pCoreClientData->m_hrTimer.function = &HrTimerCallback; ++ hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + + return; + } // InitHrTimer diff --git a/pkgs/amd-uprof/libnuma.r2 b/pkgs/amd-uprof/libnuma.r2 new file mode 100644 index 0000000..77cbabc --- /dev/null +++ b/pkgs/amd-uprof/libnuma.r2 @@ -0,0 +1,10 @@ +# Patch arguments to call sym std::string::find(char const*, unsigned long, unsigned long) +# so it matches NixOS: +# +# Change OS name to NixOS +wz NixOS @ 0x00550a43 +# And set the length to 5 characters +wa mov ecx, 5 @0x00517930 +# +# Then change the argument to dlopen() so it only uses libnuma.so +wz libnuma.so @ 0x00562940 diff --git a/pkgs/amd-uprof/makefile.patch b/pkgs/amd-uprof/makefile.patch new file mode 100644 index 0000000..7e36cee --- /dev/null +++ b/pkgs/amd-uprof/makefile.patch @@ -0,0 +1,66 @@ +--- a/Makefile 2025-06-19 20:36:49.346693267 +0200 ++++ b/Makefile 2025-06-19 20:42:29.778088660 +0200 +@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi + MODULE_NAME_KO=$(MODULE_NAME).ko + + # check is module inserted +-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME)) ++#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME)) + + # check pcore dkms status + PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION)) +@@ -50,7 +50,7 @@ endif + # “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00 + G_VERSION=9 + K_VERSION=5 +-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.) ++KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.) + GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.) + ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION)))) + ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION)))) +@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs := src/PmcDataBuffe + + # make + all: +- @chmod a+x ./AMDPPcert.sh +- @./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status; +- @SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \ +- if [ $$SIGSTATUS1 -eq 1 ]; then \ +- exit 1; \ +- fi +- @make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules +- @SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \ +- if [ $$SIGSTATUS3 -eq 0 ]; then \ +- ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \ +- fi ++ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules + + # make clean + clean: +@@ -84,23 +74,9 @@ clean: + + # make install + install: +- @mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra +- @rm -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO) +- @cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/ +- @depmod -a +- @if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \ +- echo "Uninstalling AMDPowerProfiler Linux kernel module.";\ +- rmmod $(MODULE_NAME);\ +- fi +- @modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \ +- cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \ +- echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \ +- if [ $$SIGSTATUS1 -eq 0 ]; then \ +- echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \ +- exit 1; \ +- else \ +- cat $(PWD)/sign_status1; \ +- fi ++ mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/ ++ cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/ ++ + # make dkms + dkms: + @chmod a+x ./AMDPPcert.sh diff --git a/pkgs/cudainfo/Makefile b/pkgs/cudainfo/Makefile new file mode 100644 index 0000000..5990eba --- /dev/null +++ b/pkgs/cudainfo/Makefile @@ -0,0 +1,12 @@ +HOSTCXX ?= g++ +NVCC := nvcc -ccbin $(HOSTCXX) +CXXFLAGS := -m64 + +# Target rules +all: cudainfo + +cudainfo: cudainfo.cpp + $(NVCC) $(CXXFLAGS) -o $@ $< + +clean: + rm -f cudainfo cudainfo.o diff --git a/pkgs/cudainfo/cudainfo.cpp b/pkgs/cudainfo/cudainfo.cpp new file mode 100644 index 0000000..815500b --- /dev/null +++ b/pkgs/cudainfo/cudainfo.cpp @@ -0,0 +1,600 @@ +/* + * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ +/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */ + +// Shared Utilities (QA Testing) + +// std::system includes +#include +#include + +#include + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) + +// CUDA Runtime error messages +#ifdef __DRIVER_TYPES_H__ +static const char *_cudaGetErrorEnum(cudaError_t error) +{ + switch (error) + { + case cudaSuccess: + return "cudaSuccess"; + + case cudaErrorMissingConfiguration: + return "cudaErrorMissingConfiguration"; + + case cudaErrorMemoryAllocation: + return "cudaErrorMemoryAllocation"; + + case cudaErrorInitializationError: + return "cudaErrorInitializationError"; + + case cudaErrorLaunchFailure: + return "cudaErrorLaunchFailure"; + + case cudaErrorPriorLaunchFailure: + return "cudaErrorPriorLaunchFailure"; + + case cudaErrorLaunchTimeout: + return "cudaErrorLaunchTimeout"; + + case cudaErrorLaunchOutOfResources: + return "cudaErrorLaunchOutOfResources"; + + case cudaErrorInvalidDeviceFunction: + return "cudaErrorInvalidDeviceFunction"; + + case cudaErrorInvalidConfiguration: + return "cudaErrorInvalidConfiguration"; + + case cudaErrorInvalidDevice: + return "cudaErrorInvalidDevice"; + + case cudaErrorInvalidValue: + return "cudaErrorInvalidValue"; + + case cudaErrorInvalidPitchValue: + return "cudaErrorInvalidPitchValue"; + + case cudaErrorInvalidSymbol: + return "cudaErrorInvalidSymbol"; + + case cudaErrorMapBufferObjectFailed: + return "cudaErrorMapBufferObjectFailed"; + + case cudaErrorUnmapBufferObjectFailed: + return "cudaErrorUnmapBufferObjectFailed"; + + case cudaErrorInvalidHostPointer: + return "cudaErrorInvalidHostPointer"; + + case cudaErrorInvalidDevicePointer: + return "cudaErrorInvalidDevicePointer"; + + case cudaErrorInvalidTexture: + return "cudaErrorInvalidTexture"; + + case cudaErrorInvalidTextureBinding: + return "cudaErrorInvalidTextureBinding"; + + case cudaErrorInvalidChannelDescriptor: + return "cudaErrorInvalidChannelDescriptor"; + + case cudaErrorInvalidMemcpyDirection: + return "cudaErrorInvalidMemcpyDirection"; + + case cudaErrorAddressOfConstant: + return "cudaErrorAddressOfConstant"; + + case cudaErrorTextureFetchFailed: + return "cudaErrorTextureFetchFailed"; + + case cudaErrorTextureNotBound: + return "cudaErrorTextureNotBound"; + + case cudaErrorSynchronizationError: + return "cudaErrorSynchronizationError"; + + case cudaErrorInvalidFilterSetting: + return "cudaErrorInvalidFilterSetting"; + + case cudaErrorInvalidNormSetting: + return "cudaErrorInvalidNormSetting"; + + case cudaErrorMixedDeviceExecution: + return "cudaErrorMixedDeviceExecution"; + + case cudaErrorCudartUnloading: + return "cudaErrorCudartUnloading"; + + case cudaErrorUnknown: + return "cudaErrorUnknown"; + + case cudaErrorNotYetImplemented: + return "cudaErrorNotYetImplemented"; + + case cudaErrorMemoryValueTooLarge: + return "cudaErrorMemoryValueTooLarge"; + + case cudaErrorInvalidResourceHandle: + return "cudaErrorInvalidResourceHandle"; + + case cudaErrorNotReady: + return "cudaErrorNotReady"; + + case cudaErrorInsufficientDriver: + return "cudaErrorInsufficientDriver"; + + case cudaErrorSetOnActiveProcess: + return "cudaErrorSetOnActiveProcess"; + + case cudaErrorInvalidSurface: + return "cudaErrorInvalidSurface"; + + case cudaErrorNoDevice: + return "cudaErrorNoDevice"; + + case cudaErrorECCUncorrectable: + return "cudaErrorECCUncorrectable"; + + case cudaErrorSharedObjectSymbolNotFound: + return "cudaErrorSharedObjectSymbolNotFound"; + + case cudaErrorSharedObjectInitFailed: + return "cudaErrorSharedObjectInitFailed"; + + case cudaErrorUnsupportedLimit: + return "cudaErrorUnsupportedLimit"; + + case cudaErrorDuplicateVariableName: + return "cudaErrorDuplicateVariableName"; + + case cudaErrorDuplicateTextureName: + return "cudaErrorDuplicateTextureName"; + + case cudaErrorDuplicateSurfaceName: + return "cudaErrorDuplicateSurfaceName"; + + case cudaErrorDevicesUnavailable: + return "cudaErrorDevicesUnavailable"; + + case cudaErrorInvalidKernelImage: + return "cudaErrorInvalidKernelImage"; + + case cudaErrorNoKernelImageForDevice: + return "cudaErrorNoKernelImageForDevice"; + + case cudaErrorIncompatibleDriverContext: + return "cudaErrorIncompatibleDriverContext"; + + case cudaErrorPeerAccessAlreadyEnabled: + return "cudaErrorPeerAccessAlreadyEnabled"; + + case cudaErrorPeerAccessNotEnabled: + return "cudaErrorPeerAccessNotEnabled"; + + case cudaErrorDeviceAlreadyInUse: + return "cudaErrorDeviceAlreadyInUse"; + + case cudaErrorProfilerDisabled: + return "cudaErrorProfilerDisabled"; + + case cudaErrorProfilerNotInitialized: + return "cudaErrorProfilerNotInitialized"; + + case cudaErrorProfilerAlreadyStarted: + return "cudaErrorProfilerAlreadyStarted"; + + case cudaErrorProfilerAlreadyStopped: + return "cudaErrorProfilerAlreadyStopped"; + + /* Since CUDA 4.0*/ + case cudaErrorAssert: + return "cudaErrorAssert"; + + case cudaErrorTooManyPeers: + return "cudaErrorTooManyPeers"; + + case cudaErrorHostMemoryAlreadyRegistered: + return "cudaErrorHostMemoryAlreadyRegistered"; + + case cudaErrorHostMemoryNotRegistered: + return "cudaErrorHostMemoryNotRegistered"; + + /* Since CUDA 5.0 */ + case cudaErrorOperatingSystem: + return "cudaErrorOperatingSystem"; + + case cudaErrorPeerAccessUnsupported: + return "cudaErrorPeerAccessUnsupported"; + + case cudaErrorLaunchMaxDepthExceeded: + return "cudaErrorLaunchMaxDepthExceeded"; + + case cudaErrorLaunchFileScopedTex: + return "cudaErrorLaunchFileScopedTex"; + + case cudaErrorLaunchFileScopedSurf: + return "cudaErrorLaunchFileScopedSurf"; + + case cudaErrorSyncDepthExceeded: + return "cudaErrorSyncDepthExceeded"; + + case cudaErrorLaunchPendingCountExceeded: + return "cudaErrorLaunchPendingCountExceeded"; + + case cudaErrorNotPermitted: + return "cudaErrorNotPermitted"; + + case cudaErrorNotSupported: + return "cudaErrorNotSupported"; + + /* Since CUDA 6.0 */ + case cudaErrorHardwareStackError: + return "cudaErrorHardwareStackError"; + + case cudaErrorIllegalInstruction: + return "cudaErrorIllegalInstruction"; + + case cudaErrorMisalignedAddress: + return "cudaErrorMisalignedAddress"; + + case cudaErrorInvalidAddressSpace: + return "cudaErrorInvalidAddressSpace"; + + case cudaErrorInvalidPc: + return "cudaErrorInvalidPc"; + + case cudaErrorIllegalAddress: + return "cudaErrorIllegalAddress"; + + /* Since CUDA 6.5*/ + case cudaErrorInvalidPtx: + return "cudaErrorInvalidPtx"; + + case cudaErrorInvalidGraphicsContext: + return "cudaErrorInvalidGraphicsContext"; + + case cudaErrorStartupFailure: + return "cudaErrorStartupFailure"; + + case cudaErrorApiFailureBase: + return "cudaErrorApiFailureBase"; + } + + return ""; +} +#endif + +template< typename T > +void check(T result, char const *const func, const char *const file, int const line) +{ + if (result) + { + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", + file, line, static_cast(result), _cudaGetErrorEnum(result), func); + cudaDeviceReset(); + // Make sure we call CUDA Device Reset before exiting + exit(EXIT_FAILURE); + } +} + +int *pArgc = NULL; +char **pArgv = NULL; + +#if CUDART_VERSION < 5000 + +// CUDA-C includes +#include + +// This function wraps the CUDA Driver API into a template function +template +inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) +{ + CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device); + + if (CUDA_SUCCESS != error) { + fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n", + error, __FILE__, __LINE__); + + // cudaDeviceReset causes the driver to clean up all state. While + // not mandatory in normal operation, it is good practice. It is also + // needed to ensure correct operation when the application is being + // profiled. Calling cudaDeviceReset causes all profile data to be + // flushed before the application exits + cudaDeviceReset(); + exit(EXIT_FAILURE); + } +} + +#endif /* CUDART_VERSION < 5000 */ + +// Beginning of GPU Architecture definitions +inline int ConvertSMVer2Cores(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = { + { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class + { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class + { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class + { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class + { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class + { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class + { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class + { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class + { -1, -1 } + }; + + int index = 0; + + while (nGpuArchCoresPerSM[index].SM != -1) { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { + return nGpuArchCoresPerSM[index].Cores; + } + + index++; + } + + // If we don't find the values, we default use the previous one to run properly + printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); + return nGpuArchCoresPerSM[index-1].Cores; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main(int argc, char **argv) +{ + pArgc = &argc; + pArgv = argv; + + printf("%s Starting...\n\n", argv[0]); + printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n"); + + int deviceCount = 0; + cudaError_t error_id = cudaGetDeviceCount(&deviceCount); + + if (error_id != cudaSuccess) { + printf("cudaGetDeviceCount failed: %s (%d)\n", + cudaGetErrorString(error_id), (int) error_id); + printf("Result = FAIL\n"); + exit(EXIT_FAILURE); + } + + // This function call returns 0 if there are no CUDA capable devices. + if (deviceCount == 0) + printf("There are no available device(s) that support CUDA\n"); + else + printf("Detected %d CUDA Capable device(s)\n", deviceCount); + + int dev, driverVersion = 0, runtimeVersion = 0; + + for (dev = 0; dev < deviceCount; ++dev) { + cudaSetDevice(dev); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, dev); + + printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name); + + // Console log + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10); + printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor); + + printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", + (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem); + + printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n", + deviceProp.multiProcessorCount, + ConvertSMVer2Cores(deviceProp.major, deviceProp.minor), + ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount); + printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); + + +#if CUDART_VERSION >= 5000 + // This is supported in CUDA 5.0 (runtime API device properties) + printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f); + printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); + + if (deviceProp.l2CacheSize) { + printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize); + } + +#else + // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API) + int memoryClock; + getCudaAttribute(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev); + printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f); + int memBusWidth; + getCudaAttribute(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev); + printf(" Memory Bus Width: %d-bit\n", memBusWidth); + int L2CacheSize; + getCudaAttribute(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev); + + if (L2CacheSize) { + printf(" L2 Cache Size: %d bytes\n", L2CacheSize); + } + +#endif + + printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n", + deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1], + deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); + printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n", + deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]); + printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n", + deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]); + + + printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem); + printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock); + printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock); + printf(" Warp size: %d\n", deviceProp.warpSize); + printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor); + printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock); + printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", + deviceProp.maxThreadsDim[0], + deviceProp.maxThreadsDim[1], + deviceProp.maxThreadsDim[2]); + printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", + deviceProp.maxGridSize[0], + deviceProp.maxGridSize[1], + deviceProp.maxGridSize[2]); + printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch); + printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment); + printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount); + printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); + printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No"); + printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No"); + printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No"); + printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled"); +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)"); +#endif + printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No"); + printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); + + const char *sComputeMode[] = { + "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", + "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", + "Prohibited (no host thread can use ::cudaSetDevice() with this device)", + "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", + "Unknown", + NULL + }; + printf(" Compute Mode:\n"); + printf(" < %s >\n", sComputeMode[deviceProp.computeMode]); + } + + // If there are 2 or more GPUs, query to determine whether RDMA is supported + if (deviceCount >= 2) + { + cudaDeviceProp prop[64]; + int gpuid[64]; // we want to find the first two GPU's that can support P2P + int gpu_p2p_count = 0; + + for (int i=0; i < deviceCount; i++) + { + checkCudaErrors(cudaGetDeviceProperties(&prop[i], i)); + + // Only boards based on Fermi or later can support P2P + if ((prop[i].major >= 2) +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this + && prop[i].tccDriver +#endif + ) + { + // This is an array of P2P capable GPUs + gpuid[gpu_p2p_count++] = i; + } + } + + // Show all the combinations of support P2P GPUs + int can_access_peer_0_1, can_access_peer_1_0; + + if (gpu_p2p_count >= 2) + { + for (int i = 0; i < gpu_p2p_count-1; i++) + { + for (int j = 1; j < gpu_p2p_count; j++) + { + checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j])); + printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i], + prop[gpuid[j]].name, gpuid[j] , + can_access_peer_0_1 ? "Yes" : "No"); + } + } + + for (int j = 1; j < gpu_p2p_count; j++) + { + for (int i = 0; i < gpu_p2p_count-1; i++) + { + checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i])); + printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j], + prop[gpuid[i]].name, gpuid[i] , + can_access_peer_1_0 ? "Yes" : "No"); + } + } + } + } + + // csv masterlog info + // ***************************** + // exe and CUDA driver name + printf("\n"); + std::string sProfileString = "deviceQuery, CUDA Driver = CUDART"; + char cTemp[128]; + + // driver version + sProfileString += ", CUDA Driver Version = "; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10); +#else + sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10); +#endif + sProfileString += cTemp; + + // Runtime version + sProfileString += ", CUDA Runtime Version = "; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); +#else + sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); +#endif + sProfileString += cTemp; + + // Device count + sProfileString += ", NumDevs = "; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 10, "%d", deviceCount); +#else + sprintf(cTemp, "%d", deviceCount); +#endif + sProfileString += cTemp; + + // Print Out all device Names + for (dev = 0; dev < deviceCount; ++dev) + { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + sprintf_s(cTemp, 13, ", Device%d = ", dev); +#else + sprintf(cTemp, ", Device%d = ", dev); +#endif + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, dev); + sProfileString += cTemp; + sProfileString += deviceProp.name; + } + + sProfileString += "\n"; + printf("%s", sProfileString.c_str()); + + printf("Result = PASS\n"); + + // finish + // cudaDeviceReset causes the driver to clean up all state. While + // not mandatory in normal operation, it is good practice. It is also + // needed to ensure correct operation when the application is being + // profiled. Calling cudaDeviceReset causes all profile data to be + // flushed before the application exits + cudaDeviceReset(); + return 0; +} diff --git a/pkgs/cudainfo/default.nix b/pkgs/cudainfo/default.nix new file mode 100644 index 0000000..871d697 --- /dev/null +++ b/pkgs/cudainfo/default.nix @@ -0,0 +1,43 @@ +{ + stdenv +, cudatoolkit +, cudaPackages +, autoAddDriverRunpath +, strace +}: + +stdenv.mkDerivation (finalAttrs: { + name = "cudainfo"; + src = ./.; + buildInputs = [ + cudatoolkit # Required for nvcc + cudaPackages.cuda_cudart.static # Required for -lcudart_static + autoAddDriverRunpath + ]; + installPhase = '' + mkdir -p $out/bin + cp -a cudainfo $out/bin + ''; + passthru.gpuCheck = stdenv.mkDerivation { + name = "cudainfo-test"; + requiredSystemFeatures = [ "cuda" ]; + dontBuild = true; + nativeCheckInputs = [ + finalAttrs.finalPackage # The cudainfo package from above + strace # When it fails, it will show the trace + ]; + dontUnpack = true; + doCheck = true; + checkPhase = '' + if ! cudainfo; then + set -x + cudainfo=$(command -v cudainfo) + ldd $cudainfo + readelf -d $cudainfo + strace -f $cudainfo + set +x + fi + ''; + installPhase = "touch $out"; + }; +}) diff --git a/pkgs/meteocat-exporter/default.nix b/pkgs/meteocat-exporter/default.nix new file mode 100644 index 0000000..5bc4f09 --- /dev/null +++ b/pkgs/meteocat-exporter/default.nix @@ -0,0 +1,25 @@ +{ python3Packages, lib }: + +python3Packages.buildPythonApplication rec { + pname = "meteocat-exporter"; + version = "1.0"; + + src = ./.; + + doCheck = false; + + build-system = with python3Packages; [ + setuptools + ]; + + dependencies = with python3Packages; [ + beautifulsoup4 + lxml + prometheus-client + ]; + + meta = with lib; { + description = "MeteoCat Prometheus Exporter"; + platforms = platforms.linux; + }; +} diff --git a/pkgs/meteocat-exporter/meteocat-exporter b/pkgs/meteocat-exporter/meteocat-exporter new file mode 100644 index 0000000..acc9f3e --- /dev/null +++ b/pkgs/meteocat-exporter/meteocat-exporter @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import time +from prometheus_client import start_http_server, Gauge +from bs4 import BeautifulSoup +from urllib import request + +# Configuration ------------------------------------------- +meteo_station = "X8" # Barcelona - Zona Universitària +listening_port = 9929 +update_period = 60 * 5 # Each 5 min +# --------------------------------------------------------- + +metric_tmin = Gauge('meteocat_temp_min', 'Min temperature') +metric_tmax = Gauge('meteocat_temp_max', 'Max temperature') +metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature') +metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation') + +def update(st): + url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st + response = request.urlopen(url) + data = response.read() + soup = BeautifulSoup(data, 'lxml') + table = soup.find("table", {"class" : "tblperiode"}) + rows = table.find_all('tr') + row = rows[-1] # Take the last row + row_data = [] + header = row.find('th') + header_text = header.text.strip() + row_data.append(header_text) + for col in row.find_all('td'): + row_data.append(col.text) + try: + # Sometimes it will return '(s/d)' and fail to parse + metric_tavg.set(float(row_data[1])) + metric_tmax.set(float(row_data[2])) + metric_tmin.set(float(row_data[3])) + metric_srad.set(float(row_data[10])) + #print("ok: temp_avg={}".format(float(row_data[1]))) + except: + print("cannot parse row: {}".format(row)) + metric_tavg.set(float("nan")) + metric_tmax.set(float("nan")) + metric_tmin.set(float("nan")) + metric_srad.set(float("nan")) + +if __name__ == '__main__': + start_http_server(port=listening_port, addr="localhost") + while True: + try: + update(meteo_station) + except: + print("update failed") + time.sleep(update_period) diff --git a/pkgs/meteocat-exporter/setup.py b/pkgs/meteocat-exporter/setup.py new file mode 100644 index 0000000..9cc74d3 --- /dev/null +++ b/pkgs/meteocat-exporter/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='meteocat-exporter', + version='1.0', + # Modules to import from other scripts: + packages=find_packages(), + # Executables + scripts=["meteocat-exporter"], + ) diff --git a/pkgs/mpich/default.nix b/pkgs/mpich/default.nix index 36bee85..4b5307a 100644 --- a/pkgs/mpich/default.nix +++ b/pkgs/mpich/default.nix @@ -1,68 +1,36 @@ { stdenv , lib -, fetchurl -, perl -, gfortran -, openssh -, hwloc , libfabric -, enableDebug ? false +, mpich +, pmix +, gfortran +, symlinkJoin }: -with lib; - -stdenv.mkDerivation rec { - pname = "mpich"; - version = "3.3.2"; - - src = fetchurl { - url = "https://www.mpich.org/static/downloads/${version}/mpich-${version}.tar.gz"; - sha256 = "1farz5zfx4cd0c3a0wb9pgfypzw0xxql1j1294z1sxslga1ziyjb"; +let + # pmix comes with the libraries in .out and headers in .dev + pmixAll = symlinkJoin { + name = "pmix-all"; + paths = [ pmix.dev pmix.out ]; }; - +in mpich.overrideAttrs (old: { + buildInput = old.buildInputs ++ [ + libfabric + pmixAll + ]; configureFlags = [ "--enable-shared" "--enable-sharedlib" + "--with-pm=no" "--with-device=ch4:ofi" + "--with-pmi=pmix" + "--with-pmix=${pmixAll}" "--with-libfabric=${libfabric}" - ] - ++ optional enableDebug "--enable-g=dbg,log"; - - enableParallelBuilding = true; - - buildInputs = [ perl gfortran openssh hwloc libfabric ]; + "--enable-g=log" + ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ + "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 + "FCFLAGS=-fallow-argument-mismatch" + ]; hardeningDisable = [ "all" ]; - - # doCheck = true; # Fails - - preFixup = '' - # Ensure the default compilers are the ones mpich was built with - sed -i 's:CC="gcc":CC=${stdenv.cc}/bin/gcc:' $out/bin/mpicc - sed -i 's:CXX="g++":CXX=${stdenv.cc}/bin/g++:' $out/bin/mpicxx - sed -i 's:FC="gfortran":FC=${gfortran}/bin/gfortran:' $out/bin/mpifort - '' - + lib.optionalString (!stdenv.isDarwin) '' - # /tmp/nix-build... ends up in the RPATH, fix it manually - for entry in $out/bin/mpichversion $out/bin/mpivars; do - echo "fix rpath: $entry" - patchelf --set-rpath "$out/lib" $entry - done - ''; - - meta = with lib; { - description = "Implementation of the Message Passing Interface (MPI) standard"; - - longDescription = '' - MPICH is a high-performance and widely portable implementation of - the Message Passing Interface (MPI) standard (MPI-1, MPI-2 and MPI-3). - ''; - homepage = "http://www.mpich.org"; - license = { - url = "https://github.com/pmodels/mpich/blob/v${version}/COPYRIGHT"; - fullName = "MPICH license (permissive)"; - }; - maintainers = [ ]; - platforms = platforms.linux ++ platforms.darwin; - }; -} +}) diff --git a/pkgs/slurm-exporter/default.nix b/pkgs/slurm-exporter/default.nix new file mode 100644 index 0000000..9cfc972 --- /dev/null +++ b/pkgs/slurm-exporter/default.nix @@ -0,0 +1,22 @@ +{ buildGoModule, fetchFromGitHub, lib }: + +buildGoModule rec { + pname = "prometheus-slurm-exporter"; + version = "0.20"; + + src = fetchFromGitHub { + rev = version; + owner = "vpenso"; + repo = pname; + sha256 = "sha256-KS9LoDuLQFq3KoKpHd8vg1jw20YCNRJNJrnBnu5vxvs="; + }; + + vendorHash = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI="; + doCheck = false; + + meta = with lib; { + description = "Prometheus SLURM Exporter"; + homepage = "https://github.com/vpenso/prometheus-slurm-exporter"; + platforms = platforms.linux; + }; +} diff --git a/pkgs/slurm/default.nix b/pkgs/slurm/default.nix index fd7a43c..355ff4c 100644 --- a/pkgs/slurm/default.nix +++ b/pkgs/slurm/default.nix @@ -1,80 +1,22 @@ -{ stdenv, lib, fetchFromGitHub, pkg-config, libtool, curl -, python, munge, perl, pam, openssl -, ncurses, libmysqlclient, gtk2, lua, hwloc, numactl -, readline, freeipmi, libssh2, xorg -, pmix -# enable internal X11 support via libssh2 -, enableX11 ? true -}: +{ slurm }: -stdenv.mkDerivation rec { - name = "slurm-${version}"; - version = "17.11.9-2"; - - # N.B. We use github release tags instead of https://www.schedmd.com/downloads.php - # because the latter does not keep older releases. - src = fetchFromGitHub { - owner = "SchedMD"; - repo = "slurm"; - # The release tags use - instead of . - rev = "${builtins.replaceStrings ["."] ["-"] name}"; - sha256 = "1lq4ac6yjai6wh979dciw8v3d99zbd3w36rfh0vpncqm672fg1qy"; - }; - - outputs = [ "out" "dev" ]; - - prePatch = lib.optional enableX11 '' - substituteInPlace src/common/x11_util.c \ - --replace '"/usr/bin/xauth"' '"${xorg.xauth}/bin/xauth"' +slurm.overrideAttrs (old: { + patches = (old.patches or []) ++ [ + # See https://bugs.schedmd.com/show_bug.cgi?id=19324 + # Still unmerged as of 2025-10-03, another corpo-cancer. + ./slurm-rank-expansion.patch + ]; + # Install also the pam_slurm_adopt library to restrict users from accessing + # nodes with no job allocated. + # TODO: Review pam_slurm_adopt, I don't trust their code much. + postBuild = (old.postBuild or "") + '' + pushd contribs/pam_slurm_adopt + make "PAM_DIR=$out/lib/security" + popd ''; - - # nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode' - # https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es - # this doesn't fix tests completely at least makes slurmd to launch - hardeningDisable = [ "bindnow" ]; - - nativeBuildInputs = [ pkg-config libtool ]; - buildInputs = [ - curl python munge perl pam openssl - libmysqlclient ncurses gtk2 - lua hwloc numactl readline freeipmi - pmix - ] ++ lib.optionals enableX11 [ libssh2 xorg.xauth ]; - - configureFlags = with lib; - [ "--with-munge=${munge}" - "--with-ssl=${openssl.dev}" - "--with-hwloc=${hwloc.dev}" - "--with-freeipmi=${freeipmi}" - "--sysconfdir=/etc/slurm" - "--with-pmix=${pmix}" - ] ++ (optional (gtk2 == null) "--disable-gtktest") - ++ (optional enableX11 "--with-libssh2=${libssh2.dev}"); - - - preConfigure = '' - patchShebangs ./doc/html/shtml2html.py - patchShebangs ./doc/man/man2html.py - patchShebangs ./configure + postInstall = (old.postInstall or "") + '' + pushd contribs/pam_slurm_adopt + make "PAM_DIR=$out/lib/security" install + popd ''; - -# postBuild = '' -# pushd contrib/pmi2 -# make -j install -# popd -# ''; - - postInstall = '' - rm -f $out/lib/*.la $out/lib/slurm/*.la - ''; - - enableParallelBuilding = true; - - meta = with lib; { - homepage = http://www.schedmd.com/; - description = "Simple Linux Utility for Resource Management"; - platforms = platforms.linux; - license = licenses.gpl2; - maintainers = with maintainers; [ jagajaga markuskowa ]; - }; -} +}) diff --git a/pkgs/slurm/slurm-rank-expansion.patch b/pkgs/slurm/slurm-rank-expansion.patch new file mode 100644 index 0000000..4666d76 --- /dev/null +++ b/pkgs/slurm/slurm-rank-expansion.patch @@ -0,0 +1,11 @@ +--- a/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:05:24.815313882 +0100 ++++ b/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:09:53.936900823 +0100 +@@ -314,7 +314,7 @@ static void _dmdx_req(buf_t *buf, int no + } + + nsptr = pmixp_nspaces_local(); +- if (nsptr->ntasks <= rank) { ++ if ((long) nsptr->ntasks <= (long) rank) { + char *nodename = pmixp_info_job_host(nodeid); + PMIXP_ERROR("Bad request from %s: nspace \"%s\" has only %d ranks, asked for %d", + nodename, ns, nsptr->ntasks, rank); diff --git a/pkgs/upc-qaire-exporter/default.nix b/pkgs/upc-qaire-exporter/default.nix new file mode 100644 index 0000000..b5c14cb --- /dev/null +++ b/pkgs/upc-qaire-exporter/default.nix @@ -0,0 +1,24 @@ +{ python3Packages, lib }: + +python3Packages.buildPythonApplication rec { + pname = "upc-qaire-exporter"; + version = "1.0"; + + src = ./.; + + doCheck = false; + + build-system = with python3Packages; [ + setuptools + ]; + + dependencies = with python3Packages; [ + prometheus-client + requests + ]; + + meta = with lib; { + description = "UPC Qaire Prometheus Exporter"; + platforms = platforms.linux; + }; +} diff --git a/pkgs/upc-qaire-exporter/setup.py b/pkgs/upc-qaire-exporter/setup.py new file mode 100644 index 0000000..e2238a7 --- /dev/null +++ b/pkgs/upc-qaire-exporter/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='upc-qaire-exporter', + version='1.0', + # Modules to import from other scripts: + packages=find_packages(), + # Executables + scripts=["upc-qaire-exporter"], + ) diff --git a/pkgs/upc-qaire-exporter/upc-qaire-exporter b/pkgs/upc-qaire-exporter/upc-qaire-exporter new file mode 100644 index 0000000..39697aa --- /dev/null +++ b/pkgs/upc-qaire-exporter/upc-qaire-exporter @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +import time +from prometheus_client import start_http_server, Gauge +import requests, json +from datetime import datetime, timedelta + +# Configuration ------------------------------------------- +listening_port = 9928 +update_period = 60 * 5 # Each 5 min +# --------------------------------------------------------- + +metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor') + +def genparams(): + d = {} + d['topic'] = 'TEMPERATURE' + d['shift_dates_to'] = '' + d['datapoints'] = 301 + d['devicesAndColors'] = '1148418@@@#40ACB6' + + now = datetime.now() + + d['fromDate'] = now.strftime('%d/%m/%Y') + d['toDate'] = now.strftime('%d/%m/%Y') + d['serviceFrequency'] = 'NONE' + + # WTF! + for i in range(7): + for j in range(48): + key = 'week.days[{}].hours[{}].value'.format(i, j) + d[key] = 'OPEN' + + return d + +def measure(): + # First we need to load session + s = requests.Session() + r = s.get("https://upc.edu/sirena") + if r.status_code != 200: + print("bad HTTP status code on new session: {}".format(r.status_code)) + return + + if s.cookies.get("JSESSIONID") is None: + print("cannot get JSESSIONID") + return + + # Now we can pull the data + url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json" + r = s.post(url, data=genparams()) + + if r.status_code != 200: + print("bad HTTP status code on data: {}".format(r.status_code)) + return + + #print(r.text) + j = json.loads(r.content) + + # Just take the last one + last = j['data']['chartElementList'][-1] + temp = last['values']['1148418-Temperatura'] + + return temp + +if __name__ == '__main__': + start_http_server(port=listening_port, addr="localhost") + while True: + try: + metric_temp.set(measure()) + except: + print("measure failed") + metric_temp.set(float("nan")) + + time.sleep(update_period) diff --git a/rebuild.sh b/rebuild.sh new file mode 100755 index 0000000..1320c24 --- /dev/null +++ b/rebuild.sh @@ -0,0 +1,16 @@ +#!/bin/sh -ex + +if [ "$(id -u)" != 0 ]; then + echo "Needs root permissions" + exit 1 +fi + +if [ "$(hostname)" != "hut" ]; then + >&2 echo "must run from machine hut, not $(hostname)" + exit 1 +fi + +# Update all nodes +nixos-rebuild switch --flake . +nixos-rebuild switch --flake .#owl1 --target-host owl1 +nixos-rebuild switch --flake .#owl2 --target-host owl2 diff --git a/secrets/ceph-user.age b/secrets/ceph-user.age new file mode 100644 index 0000000..48b912c --- /dev/null +++ b/secrets/ceph-user.age @@ -0,0 +1,25 @@ +age-encryption.org/v1 +-> ssh-ed25519 AY8zKw /gmhFOFqOs8IobAImvQVKeM5Y6k0FpuR61/Cu5drVVI +g9FXJg2oIoien0zJ70FWHwSTM8SBwbpS188S3Swj7EM +-> ssh-ed25519 sgAamA opPjlWPhSiI0Rd5l7kd204S5FXFLcQcQftyKb7MDmnU +3XrRDVnglCP+vBwvfd1rP5gHttsGDHyXwbf10a8/kKY +-> ssh-ed25519 HY2yRg QKZbubM76C3tobPoyCFDRclA9Pzb2fC7s4WOoIgdORc +K5kckU0KhQFTE6SikJXFJgM41Tco5+VqOsaG0qLrY1Q +-> ssh-ed25519 fw2Xhg +ohqts8dLFjvdHxrGHcOGxU0dm+V3N//giljHkobpDM +jR/UzGrfS9lrJ/VeolKLxfzeJAf2fIB2pdIn/6ukqNk +-> ssh-ed25519 tcumPQ 3DPkDPIQQSVtXSLzIRETsIyXQ0k1o18Evn6vf+l/6R8 +bLXF62OmJjnOT1vvgq3+AcOKKSG5NonrK5EqCVc0Mwo +-> ssh-ed25519 JJ1LWg 2Wefc7eLolMU5InEmCNTq21Mf71mI0a2N1HgDrlHvy4 +qXFW9CQBnrzubZ0mzS0Io2WGRrwGBkmeYndBTcZn/fM +-> ssh-ed25519 cDBabA oiH36AoIt/fFFYgnoxtH7OoetP+2/wjtn8qo3RJDSHc +qKmkxy1aZGP4ZwC0iH7n7hiJ0+rFQYvjQb5O1a1Z0r4 +-> ssh-ed25519 cK5kHw bX3RtO5StMejUYWAaA37fjHA5nO7Xs1vWDQk3yOjs2o +Egxmcf8FKAd+E5hMLmhV1yQsCo5rJyUazf1szOvpTAM +-> ssh-ed25519 CAWG4Q oKqqRDJH0w8lsoQBQk0w8PO+z5gFNmSaGBUSumvDp1I +m1zWp9MfViAmtpbJhqOHraIokDaPKb0DvvO4vAGCTWI +-> ssh-ed25519 xA739A G26kPOz6sbFATs+KAr7gbDvji13eA1smFusQAOJXMwA +Sppvz7A103kZoNxoGsd6eXeCvVh7mBE2MRwLFj9O1dY +-> ssh-ed25519 MSF3dg 55ekNcp+inbUd+GQ/VZ7BoBASaJ8YDqF74CVXy1PUxQ +aTHLLAbzQPWWld/OT3BKebc6FcmsqMTaWCPBGm1UHic +--- mVkAMnI9XQhS3fMiFuuXP/yLR9wEG9+Rr8pA4Uc0avY +DU sjM$[M[_K7sjuvD4g܄3Gn ɽP7~rZs \ No newline at end of file diff --git a/secrets/gitea-runner-token.age b/secrets/gitea-runner-token.age new file mode 100644 index 0000000..a5c23d9 --- /dev/null +++ b/secrets/gitea-runner-token.age @@ -0,0 +1,13 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik +DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI +-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio +hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU +-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw +hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs +-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY +eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM +-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg +/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg +--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs +YM:E O2r=&4CQΣhCcb^Sy% x-vC`gW^wVG \ No newline at end of file diff --git a/secrets/gitlab-bsc-docker-token.age b/secrets/gitlab-bsc-docker-token.age new file mode 100644 index 0000000..2b77fcf Binary files /dev/null and b/secrets/gitlab-bsc-docker-token.age differ diff --git a/secrets/gitlab-runner-docker-token.age b/secrets/gitlab-runner-docker-token.age new file mode 100644 index 0000000..e7f58c7 --- /dev/null +++ b/secrets/gitlab-runner-docker-token.age @@ -0,0 +1,13 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg U2KQWviZIVNemm9e8h7H+eOzoYNxXgLLS3hsZLMAuGk +6n5dH1McNzk3rscP4v2pqZYDWtUFMd15rZsEd/mqIFM +-> ssh-ed25519 cK5kHw Ebrj/cpz1cFWAYAV9OxgyyH85OEMUnfUIV66p7jaoFY +6J7hWqODtS/fIF4BpxhxbrxZq5vbolvbLqRKqazT02M +-> ssh-ed25519 CAWG4Q mXqoQH9ycHF7u0y8mazCgynHxNLxTnrmQHke+2a5QCc +mq6PdSF+KOqthuXwzTCsOQsi5KG0z1wHUck+bSTyOBY +-> ssh-ed25519 xA739A TADeswueqDEroZWLjMw3RDNwVQ2xRD+JUMVZENovn0M +KFlnSjVFbjc+ZsbY8Ed7edC5B01TJGzd/dSryiLArPc +-> ssh-ed25519 MSF3dg Pq+ZD8AqJGDHDbd4PO1ngNFST8+6C2ghZkO/knKzzEc +wyiL/u38hdQMokmfTsBrY7CtYwc+31FG4EDaqVEn31U +--- 1z4cOipayh0zYkvasEVEvGreajegE/dqBV7b6E7aFh0 +R@/iI'Nxr"`Oy8 \/ID`ߓuy:9Lt؋AU`;q8GLU#iyiڜ \ No newline at end of file diff --git a/secrets/gitlab-runner-shell-token.age b/secrets/gitlab-runner-shell-token.age new file mode 100644 index 0000000..0290f9a --- /dev/null +++ b/secrets/gitlab-runner-shell-token.age @@ -0,0 +1,15 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg NYGOSeZn8nGJUqpWoOAA9XO8P7eckUBKXCs8wPs+wlU +oLlgaZJVLV9Im1h0vHEKPVApsh46av8ovMgNoFDKle4 +-> ssh-ed25519 cK5kHw bC9UlQXeP5LwIFFO9oHXocqojLtSPWE/kWbhCbiSIGg +wPGpwKpCcV09jvxVmwj6BTmjm+CZv42sdgCqSfD624Y +-> ssh-ed25519 CAWG4Q WkJgjedCBn4i4b/VFuU9Wq21VkHxiuwsla+0PuiSiD4 +/nnfy2DTxQkkfCqzIa+lxgqn6MIgFlN5gZHYYApePvs +-> ssh-ed25519 xA739A 9gcn6j7c7rCR50AetiuCkAnMsSEMtQto///qlTkAWhs +lXrOn+cehZpRkIRzSJ1e64KsCqWf3tKa4ABbYBquvqM +-> ssh-ed25519 MSF3dg FzrEytuzBKr+HwpC1bxev3q+6cSZoMMCJdJfuANlHwo +qVyt4YpzGfvNX6IqwXs6oRA5aSgidFFxEA22D8XPJBU +--- FHVIG8tcNJBte+3VUsR3FsOs8xqrAeboFLxOV/xvSz0 +_BMjH~ ^^x' +X |1d:`r XABZ|^E{]UϾEЗ83VD^;: + \ No newline at end of file diff --git a/secrets/ipmi.yml.age b/secrets/ipmi.yml.age new file mode 100644 index 0000000..c02079f Binary files /dev/null and b/secrets/ipmi.yml.age differ diff --git a/secrets/jungle-robot-password.age b/secrets/jungle-robot-password.age new file mode 100644 index 0000000..1a296c6 Binary files /dev/null and b/secrets/jungle-robot-password.age differ diff --git a/secrets/munge-key.age b/secrets/munge-key.age new file mode 100644 index 0000000..a92ac0d Binary files /dev/null and b/secrets/munge-key.age differ diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age new file mode 100644 index 0000000..dcc0b5e Binary files /dev/null and b/secrets/nix-serve.age differ diff --git a/secrets/secrets.nix b/secrets/secrets.nix new file mode 100644 index 0000000..920d52d --- /dev/null +++ b/secrets/secrets.nix @@ -0,0 +1,34 @@ +let + keys = import ../keys.nix; + adminsKeys = builtins.attrValues keys.admins; + hut = [ keys.hosts.hut ] ++ adminsKeys; + fox = [ keys.hosts.fox ] ++ adminsKeys; + apex = [ keys.hosts.apex ] ++ adminsKeys; + raccoon = [ keys.hosts.raccoon ] ++ adminsKeys; + mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys; + tent = [ keys.hosts.tent ] ++ adminsKeys; + # Only expose ceph keys to safe nodes and admins + safe = keys.hostGroup.safe ++ adminsKeys; +in +{ + "gitea-runner-token.age".publicKeys = hut; + "gitlab-runner-docker-token.age".publicKeys = hut; + "gitlab-runner-shell-token.age".publicKeys = hut; + "gitlab-bsc-docker-token.age".publicKeys = hut; + "nix-serve.age".publicKeys = mon; + "jungle-robot-password.age".publicKeys = mon; + "ipmi.yml.age".publicKeys = mon; + + "tent-gitlab-runner-pm-docker-token.age".publicKeys = tent; + "tent-gitlab-runner-pm-shell-token.age".publicKeys = tent; + "tent-gitlab-runner-bsc-docker-token.age".publicKeys = tent; + "vpn-dac-login.age".publicKeys = tent; + "vpn-dac-client-key.age".publicKeys = tent; + + "ceph-user.age".publicKeys = safe; + "munge-key.age".publicKeys = safe; + + "wg-fox.age".publicKeys = fox; + "wg-apex.age".publicKeys = apex; + "wg-raccoon.age".publicKeys = raccoon; +} diff --git a/secrets/tent-gitlab-runner-bsc-docker-token.age b/secrets/tent-gitlab-runner-bsc-docker-token.age new file mode 100644 index 0000000..b8fe92d --- /dev/null +++ b/secrets/tent-gitlab-runner-bsc-docker-token.age @@ -0,0 +1,13 @@ +age-encryption.org/v1 +-> ssh-ed25519 G5LX5w Zhbs+NM/SI49qQ0X8bBpWUWxYM0vUKCXNAnPpIE2NR0 +CkBUmJ26EkwHztT8Pz0UGq2KZwN0Xz8iYQ9cEHL9OWQ +-> ssh-ed25519 cK5kHw 5KjUXJywRDp2A7l5ukTCS+WIAalxwP1f71ejGxwNrX4 +JW8OLmfkULXo9AwYMGNyOgZ+nQ0MVc0PCM4kKPIo6V4 +-> ssh-ed25519 CAWG4Q cVjY3R0ZHAfokA4kWlu5vOl2Gs7mdqRgRk4WSUOXAjg +IxEDvuximW99EqxmpW+Btpm0Zydmwg/u87bqnl26NYc +-> ssh-ed25519 xA739A hmuwZuxmJnuAjmU4X8yhPQ+hPWvN1G+ZS0pvD7fHamg +fnAPW6ZCrv5pSO4RQhhr8xz7ij7jAZJk0ApWluOXDng +-> ssh-ed25519 MSF3dg SSGLcWnum0Qo/0OnKDZVg9xAZMwGwVNYYmRJXxb4GU0 +pdl6kATG7n2oMsoUboBfu+vDKurJcH1UvUa70rfMQkE +--- a2ZQAeAQlO9DWnegIAq6NpI1Po6f38l+hitZvq+zIW8 +\ֺ"^DTH3_|.h^ngS]_?nz~2!p7<ʨD?~F$`q+SW(+Pcu[m`OܛϖT \ No newline at end of file diff --git a/secrets/tent-gitlab-runner-pm-docker-token.age b/secrets/tent-gitlab-runner-pm-docker-token.age new file mode 100644 index 0000000..863144d --- /dev/null +++ b/secrets/tent-gitlab-runner-pm-docker-token.age @@ -0,0 +1,13 @@ +age-encryption.org/v1 +-> ssh-ed25519 G5LX5w VKM/Y6Wy0gmb2gc4Q00VzHQ4IAxfSyshuDoaAzlEkFM +vf18uoEN5ZLJ4HcJg85epaseh1CRL9/ncXtU2HpH+QE +-> ssh-ed25519 cK5kHw sMuG07kjlI6VjPjELOUPzkn+KT9Yq7BPf0zSATM2aGI +/eODwL8KwyVgFjBK2MJlbqjN7mEvXCSsjq9D96szrng +-> ssh-ed25519 CAWG4Q t3/Ty7yCqC5x8KQY4VaHSQ9Q3epqMpXoBDKyKx9+VzE +JwgUsqMd+1jFZvFp9/SIoowbhSMVEkKp03T69+OHjho +-> ssh-ed25519 xA739A 0ohmKK427+4vupivrtjXp0dDK8wT4XUA9rWgcsCGKgA +msbeQyz3pL8RLtAeXX5tsfyHyOXxhfYpqaLEKnRxpPQ +-> ssh-ed25519 MSF3dg H+6jAoP7/Dxp8C/7Bk1C4CT1hpkUhtbnTWWIxkO24Ec +SrMuUG93T5lUw3xINEen5EEKLXJizIGFhBO1fVroFHE +--- tIPnH9cxTV3m3qzvZB97Egz+raWwZJ182BXXKDu8f+o +f#,|Ey.vDLӺJPX`-#FUbs(Q!?#xJG?5~6MA UCM$+}WNϨG!a%ǽG \ No newline at end of file diff --git a/secrets/tent-gitlab-runner-pm-shell-token.age b/secrets/tent-gitlab-runner-pm-shell-token.age new file mode 100644 index 0000000..74527b0 --- /dev/null +++ b/secrets/tent-gitlab-runner-pm-shell-token.age @@ -0,0 +1,13 @@ +age-encryption.org/v1 +-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY +RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4 +-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE +Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM +-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE +FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU +-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU +sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0 +-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc +8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA +--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ +-b#M.@tŵ}+ό#@ky?vnT+[Q gA "qh]WVoxD](S%IU_f2d[֐pS` \ No newline at end of file diff --git a/secrets/vpn-dac-client-key.age b/secrets/vpn-dac-client-key.age new file mode 100644 index 0000000..c414fd7 Binary files /dev/null and b/secrets/vpn-dac-client-key.age differ diff --git a/secrets/vpn-dac-login.age b/secrets/vpn-dac-login.age new file mode 100644 index 0000000..6191ec7 --- /dev/null +++ b/secrets/vpn-dac-login.age @@ -0,0 +1,14 @@ +age-encryption.org/v1 +-> ssh-ed25519 G5LX5w SRJhNenoQXbT1FgX3TMPnVH5P6oe2eHot+M1YsEjsEk +hfTSLgKi98Eh7JK5o7x2POpTEtQlQCpEa3keUFYCuME +-> ssh-ed25519 cK5kHw z5TwWJTkvx7HztjXHJW/aCOtOfPrQaLP0gyIT7rXcyU +b4NCpHfasgvkLLr+6LcWUl60p59aSNnfp3bl2OFYXo0 +-> ssh-ed25519 CAWG4Q 4VpS1/OnFe8nxcQbRTKNhjsh/ZQ5cbhSMXwK/jjQ+3o +WF9wvOkqVml4UcEzyzeumKuUwCwwr2zvKLMg+PCB8nk +-> ssh-ed25519 xA739A 67FhuJ070jBVMt/xbKHWhfri6iIm0FyaFvzQabsvFBM +1G5/913dDv/r/6p1x/c5YiUnZzrX/LvIj33KW+PN0KU +-> ssh-ed25519 MSF3dg Bj/yB4N2wkyHCHC22tcjjJAA4ebSamN0Z4UVX3ZnryI +6D/ZgTs+j+MGDAbPU5zyK0i9zN6tQy68IcOnQZ27mYg +--- 169erk3ICSYLs4FPEuXCn7QlekWhsmSn0Lr+/R14I5Q +ҽ3s +w4Db."|)";.ɫ7)LeC=S؟ \ No newline at end of file diff --git a/secrets/wg-apex.age b/secrets/wg-apex.age new file mode 100644 index 0000000..c22c167 Binary files /dev/null and b/secrets/wg-apex.age differ diff --git a/secrets/wg-fox.age b/secrets/wg-fox.age new file mode 100644 index 0000000..57079f3 --- /dev/null +++ b/secrets/wg-fox.age @@ -0,0 +1,14 @@ +age-encryption.org/v1 +-> ssh-ed25519 cDBabA heyW9/cxgwFX9IexQIXjAQDWGQPNcMXcArQp2Rxsqx4 +o9MQ7EH8PDDjsJdpH9F3Xq2zUoaDAJQlfFmYucSFs6Y +-> ssh-ed25519 cK5kHw Sza4pos7K3qW3omEeyidI/jszJNf9smemSZnUJfCIww +D6vazXki7hIYraIuSiGPS+FPbkFUwHhHWDf52OhEIMg +-> ssh-ed25519 CAWG4Q YexIHueOIMmIN8JIDyNUOKBkyz/k18HqV3hTXh48KlM +xh8UJzzWT6ByN+Dpn4JrMNsjGC/uc/v6LynwjBDz9NQ +-> ssh-ed25519 xA739A KySG3TXdqfCMUkVEDGa74B0op745s3XGYxFLyAXSQAc +5EI/yb5ctW9Qu18bHm3/sK97kwGcKzzmWvPSCWm89XA +-> ssh-ed25519 MSF3dg MNxnNj0fHmri8ophexXPNjRUBUWrzcuk5S1mucxUMTE +GVFWXtISEU8ZmlwL4nh4weAgfGrt2GHX0DTzbpS6zg8 +--- UdrqkYG2ZApAuwdZeNhC50NP2rkD/Ol6y8nJa4RHx7Y +ܻm(>HY87G+*9V.pOo=+哇P0{)>z3P^ +u \ No newline at end of file diff --git a/secrets/wg-raccoon.age b/secrets/wg-raccoon.age new file mode 100644 index 0000000..f32a2aa Binary files /dev/null and b/secrets/wg-raccoon.age differ