17 Commits

Author SHA1 Message Date
f9970c0ac7 Add documentation section about GRUB chain loading 2024-07-22 12:12:26 +02:00
4a52970821 Add 10 min shutdown jitter to avoid spikes
The shutdown timer will fire at slightly different times for the
different nodes, so we slowly decrease the power consumption.
2024-07-22 11:20:02 +02:00
7b58d8fbcc Don't mount the nix store in owl nodes
Initially we planned to run jobs in those nodes by sharing the same nix
store from hut. However, these nodes are now used to build packages
which are not available in hut. Users also ssh to the nodes, which
doesn't mount the hut store, so it doesn't make much sense to keep
mounting it.
2024-07-22 11:02:32 +02:00
f3167c0cc0 Emulate other architectures in owl nodes too
Allows cross-compilation of packages for RISC-V that are known to try to
run RISC-V programs in the host.
2024-07-19 17:55:12 +02:00
d3489f8e48 Program shutdown for August 2nd for all machines 2024-07-18 18:07:12 +02:00
86f5bea6c7 Enable debuginfod daemon in owl nodes
WARNING: This will introduce noise, as the daemon wakes up from time to
time to check for new packages.
2024-07-18 16:12:39 +02:00
accb656c5e Set gitea and grafana log level to warn
Prevents filling the journal logs with information messages.
2024-07-18 15:39:02 +02:00
5e6cf2b563 Set default SLURM job time limit to one hour
Prevents enless jobs from being left forever, while allow users to
request a larger time limit.
2024-07-18 11:44:01 +02:00
29110d2d54 Allow other jobs to run in unused cores
The current select mechanism was using the memory too as a consumable
resource, which by default only sets 1 MiB per node. As each job already
requests 1 MiB, it prevents other jobs from running.

As we are not really concerned with memory usage, we only use the unused
cores in the select criteria.
2024-07-18 11:19:03 +02:00
32c919d1fc Use authentication tokens for PM GitLab runner
Starting with GitLab 16, there is a new mechanism to authenticate the
runners via authentication tokens, so use it instead.  Older tokens and
runners are also removed, as they are no longer used.

With the new way of managing tokens, both the tags and the locked state
are managed from the GitLab web page.

See: https://docs.gitlab.com/ee/ci/runners/new_creation_workflow.html
2024-07-17 17:41:02 +02:00
dba11ea88a flake.lock: Update
Flake lock file updates:

• Updated input 'agenix':
    'github:ryantm/agenix/1381a759b205dff7a6818733118d02253340fd5e' (2024-04-02)
  → 'github:ryantm/agenix/de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6' (2024-07-09)
• Updated input 'nixpkgs':
    'github:NixOS/nixpkgs/6143fc5eeb9c4f00163267708e26191d1e918932' (2024-04-21)
  → 'github:NixOS/nixpkgs/693bc46d169f5af9c992095736e82c3488bf7dbb' (2024-07-14)
2024-07-17 14:50:01 +02:00
e3985b28a0 Allow ptrace to any process of the same user
Allows users to attach GDB to their own processes, without requiring
running the program with GDB from the start.
2024-07-17 13:23:45 +02:00
9fe29b864a Add abonerib user to hut, raccon, owl1 and owl2 2024-07-17 13:23:45 +02:00
3ea7edf950 Grant rpenacob access to owl1 and owl2 nodes 2024-07-17 13:23:45 +02:00
53c200fbc5 Access private repositories via hut SSH proxy 2024-07-17 13:23:45 +02:00
f5ebf43019 Set the default proxy to point to hut 2024-07-17 13:23:29 +02:00
43e61a8da3 Allow incoming traffic to hut proxy 2024-07-17 12:56:59 +02:00
116 changed files with 551 additions and 4109 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,2 @@
*.swp
/result
/misc

View File

@@ -1,46 +0,0 @@
#!/bin/sh
# Trims the jungle repository by moving the website to its own repository and
# removing it from jungle. It also removes big pdf files and kernel
# configurations so the jungle repository is small.
set -e
if [ -e oldjungle -o -e newjungle -o -e website ]; then
echo "remove oldjungle/, newjungle/ and website/ first"
exit 1
fi
# Clone the old jungle repo
git clone gitea@tent:rarias/jungle.git oldjungle
# First split the website into a new repository
mkdir website && git -C website init -b master
git-filter-repo \
--path web \
--subdirectory-filter web \
--source oldjungle \
--target website
# Then remove the website, pdf files and big kernel configs
mkdir newjungle && git -C newjungle init -b master
git-filter-repo \
--invert-paths \
--path web \
--path-glob 'doc*.pdf' \
--path-glob '**/kernel/configs/lockdep' \
--path-glob '**/kernel/configs/defconfig' \
--source oldjungle \
--target newjungle
set -x
du -sh oldjungle newjungle website
# 57M oldjungle
# 2,3M newjungle
# 6,4M website
du -sh --exclude=.git oldjungle newjungle website
# 30M oldjungle
# 700K newjungle
# 3,5M website

34
flake.lock generated
View File

@@ -10,11 +10,11 @@
"systems": "systems"
},
"locked": {
"lastModified": 1750173260,
"narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=",
"lastModified": 1720546205,
"narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=",
"owner": "ryantm",
"repo": "agenix",
"rev": "531beac616433bac6f9e2a19feb8e99a22a66baf",
"rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6",
"type": "github"
},
"original": {
@@ -30,11 +30,11 @@
]
},
"locked": {
"lastModified": 1749650500,
"narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=",
"lastModified": 1713974364,
"narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=",
"ref": "refs/heads/master",
"rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405",
"revCount": 961,
"rev": "de89197a4a7b162db7df9d41c9d07759d87c5709",
"revCount": 937,
"type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs"
},
@@ -51,11 +51,11 @@
]
},
"locked": {
"lastModified": 1744478979,
"narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=",
"lastModified": 1700795494,
"narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
"owner": "lnl7",
"repo": "nix-darwin",
"rev": "43975d782b418ebf4969e9ccba82466728c2851b",
"rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
"type": "github"
},
"original": {
@@ -73,11 +73,11 @@
]
},
"locked": {
"lastModified": 1745494811,
"narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=",
"lastModified": 1703113217,
"narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be",
"rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
"type": "github"
},
"original": {
@@ -88,16 +88,16 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1752436162,
"narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
"lastModified": 1720957393,
"narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
"rev": "693bc46d169f5af9c992095736e82c3488bf7dbb",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.05",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}

View File

@@ -1,6 +1,6 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
agenix.url = "github:ryantm/agenix";
agenix.inputs.nixpkgs.follows = "nixpkgs";
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
@@ -18,7 +18,6 @@ in
{
nixosConfigurations = {
hut = mkConf "hut";
tent = mkConf "tent";
owl1 = mkConf "owl1";
owl2 = mkConf "owl2";
eudy = mkConf "eudy";
@@ -26,9 +25,6 @@ in
bay = mkConf "bay";
lake2 = mkConf "lake2";
raccoon = mkConf "raccoon";
fox = mkConf "fox";
apex = mkConf "apex";
weasel = mkConf "weasel";
};
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {

View File

@@ -2,36 +2,28 @@
# here all the public keys
rec {
hosts = {
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
raccoon = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNQttFvL0dNEyy7klIhLoK4xXOeM2/K9R7lPMTG3qvK raccoon";
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
};
hostGroup = with hosts; rec {
compute = [ owl1 owl2 fox raccoon ];
playground = [ eudy koro weasel ];
compute = [ owl1 owl2 ];
playground = [ eudy koro ];
storage = [ bay lake2 ];
monitor = [ hut ];
login = [ apex ];
system = storage ++ monitor ++ login;
system = storage ++ monitor;
safe = system ++ compute;
all = safe ++ playground;
};
admins = {
"rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
"rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
"rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
};
}

View File

@@ -1,69 +0,0 @@
{ lib, config, pkgs, ... }:
{
imports = [
../common/xeon.nix
../common/ssf/hosts.nix
../module/ceph.nix
../module/hut-substituter.nix
../module/slurm-server.nix
./nfs.nix
./wireguard.nix
];
# Don't install grub MBR for now
boot.loader.grub.device = "nodev";
boot.initrd.kernelModules = [
"megaraid_sas" # For HW RAID
];
environment.systemPackages = with pkgs; [
storcli # To manage HW RAID
];
fileSystems."/home" = {
device = "/dev/disk/by-label/home";
fsType = "ext4";
};
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
networking = {
hostName = "apex";
defaultGateway = "84.88.53.233";
nameservers = [ "8.8.8.8" ];
# Public facing interface
interfaces.eno1.ipv4.addresses = [ {
address = "84.88.53.236";
prefixLength = 29;
} ];
# Internal LAN to our Ethernet switch
interfaces.eno2.ipv4.addresses = [ {
address = "10.0.40.30";
prefixLength = 24;
} ];
# Infiniband over Omnipath switch (disconnected for now)
# interfaces.ibp5s0 = {};
nat = {
enable = true;
internalInterfaces = [ "eno2" ];
externalInterface = "eno1";
};
};
networking.firewall = {
extraCommands = ''
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
# logs. Insert as first position so we also protect SSH.
iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse
# Same with opsmonweb01.bsc.es which seems to be trying to access via SSH
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
'';
};
}

View File

@@ -1,48 +0,0 @@
{ ... }:
{
services.nfs.server = {
enable = true;
lockdPort = 4001;
mountdPort = 4002;
statdPort = 4000;
exports = ''
/home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash)
/home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash)
'';
};
networking.firewall = {
# Check with `rpcinfo -p`
extraCommands = ''
# Accept NFS traffic from compute nodes but not from the outside
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
# Same but UDP
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
# Accept NFS traffic from wg0
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
# Same but UDP
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
'';
};
}

View File

@@ -1,42 +0,0 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgApex.file = ../../secrets/wg-apex.age;
# Enable WireGuard
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
ips = [ "10.106.0.30/24" ];
listenPort = 666;
privateKeyFile = config.age.secrets.wgApex.path;
# Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
peers = [
{
name = "fox";
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
allowedIPs = [ "10.106.0.1/32" ];
endpoint = "fox.ac.upc.edu:666";
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
persistentKeepalive = 25;
}
{
name = "raccoon";
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
}
];
};
};
networking.hosts = {
"10.106.0.1" = [ "fox" ];
"10.106.0.236" = [ "raccoon" ];
"10.0.44.4" = [ "tent" ];
};
}

View File

@@ -2,18 +2,13 @@
{
imports = [
../common/ssf.nix
../module/hut-substituter.nix
../common/xeon.nix
../module/monitoring.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d";
boot.kernel.sysctl = {
"kernel.yama.ptrace_scope" = lib.mkForce "1";
};
environment.systemPackages = with pkgs; [
ceph
];

View File

@@ -3,7 +3,6 @@
# Includes the basic configuration for an Intel server.
imports = [
./base/agenix.nix
./base/always-power-on.nix
./base/august-shutdown.nix
./base/boot.nix
./base/env.nix

View File

@@ -1,8 +0,0 @@
{
imports = [
../../module/power-policy.nix
];
# Turn on as soon as we have power
power.policy = "always-on";
}

View File

@@ -1,12 +1,12 @@
{
# Shutdown all machines on August 3rd at 22:00, so we can protect the
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
# hardware from spurious electrical peaks on the yearly electrical cut for
# manteinance that starts on August 4th.
systemd.timers.august-shutdown = {
description = "Shutdown on August 3rd for maintenance";
description = "Shutdown on August 2nd for maintenance";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*-08-03 22:00:00";
OnCalendar = "*-08-02 11:00:00";
RandomizedDelaySec = "10min";
Unit = "systemd-poweroff.service";
};

View File

@@ -11,6 +11,12 @@
terminal_output --append serial
'';
# Enable serial console
boot.kernelParams = [
"console=tty1"
"console=ttyS0,115200"
];
boot.kernel.sysctl = {
"kernel.perf_event_paranoid" = lib.mkDefault "-1";

View File

@@ -3,8 +3,8 @@
{
environment.systemPackages = with pkgs; [
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns pv
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns
# From bsckgs overlay
osumb
];
@@ -21,8 +21,6 @@
}
];
environment.enableAllTerminfo = true;
environment.variables = {
EDITOR = "vim";
VISUAL = "vim";

View File

@@ -1,4 +1,4 @@
{ pkgs, lib, ... }:
{ pkgs, ... }:
{
networking = {
@@ -10,14 +10,10 @@
allowedTCPPorts = [ 22 ];
};
# Make sure we use iptables
nftables.enable = lib.mkForce false;
hosts = {
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
"84.88.51.152" = [ "raccoon" ];
"84.88.51.142" = [ "raccoon-ipmi" ];
"192.168.11.12" = [ "bscpm04.bsc.es" ];
"192.168.11.15" = [ "gitlab-internal.bsc.es" ];
};
};
}

View File

@@ -6,8 +6,6 @@
(import ../../../pkgs/overlay.nix)
];
nixpkgs.config.allowUnfree = true;
nix = {
nixPath = [
"nixpkgs=${nixpkgs}"
@@ -25,7 +23,6 @@
trusted-users = [ "@wheel" ];
flake-registry = pkgs.writeText "global-registry.json"
''{"flakes":[],"version":2}'';
keep-outputs = true;
};
gc = {
@@ -35,21 +32,6 @@
};
};
# The nix-gc.service can begin its execution *before* /home is mounted,
# causing it to remove all gcroots considering them as stale, as it cannot
# access the symlink. To prevent this problem, we force the service to wait
# until /home is mounted as well as other remote FS like /ceph.
systemd.services.nix-gc = {
# Start remote-fs.target if not already being started and fail if it fails
# to start. It will also be stopped if the remote-fs.target fails after
# starting successfully.
bindsTo = [ "remote-fs.target" ];
# Wait until remote-fs.target fully starts before starting this one.
after = [ "remote-fs.target"];
# Ensure we can access a remote path inside /home
unitConfig.ConditionPathExists = "/home/Computational";
};
# This value determines the NixOS release from which the default
# settings for stateful data, like file locations and database versions
# on your system were taken. Its perfectly fine and recommended to leave

View File

@@ -8,11 +8,15 @@ in
# Enable the OpenSSH daemon.
services.openssh.enable = true;
# Connect to intranet git hosts via proxy
programs.ssh.extraConfig = ''
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
User git
ProxyCommand nc -X connect -x hut:23080 %h %p
'';
programs.ssh.knownHosts = hostsKeys // {
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
"bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT";
"glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
"glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
};
}

View File

@@ -20,7 +20,6 @@
rarias = {
uid = 1880;
isNormalUser = true;
linger = true;
home = "/home/Computational/rarias";
description = "Rodrigo Arias";
group = "Computational";
@@ -40,7 +39,7 @@
home = "/home/Computational/arocanon";
description = "Aleix Roca";
group = "Computational";
extraGroups = [ "wheel" "tracing" ];
extraGroups = [ "wheel" ];
hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
@@ -56,7 +55,7 @@
home = "/home/Computational/rpenacob";
description = "Raúl Peñacoba";
group = "Computational";
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ];
hosts = [ "owl1" "owl2" "hut" ];
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
@@ -69,10 +68,10 @@
home = "/home/Computational/anavarro";
description = "Antoni Navarro";
group = "Computational";
hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ];
hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1";
hosts = [ "hut" "raccoon" ];
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
];
};
@@ -82,7 +81,7 @@
home = "/home/Computational/abonerib";
description = "Aleix Boné";
group = "Computational";
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ];
hosts = [ "owl1" "owl2" "hut" "raccoon" ];
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
@@ -95,96 +94,16 @@
home = "/home/Computational/vlopez";
description = "Victor López";
group = "Computational";
hosts = [ "apex" "koro" ];
hosts = [ "koro" ];
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
];
};
dbautist = {
uid = 5649;
isNormalUser = true;
home = "/home/Computational/dbautist";
description = "Dylan Bautista Cases";
group = "Computational";
hosts = [ "apex" "hut" "tent" "raccoon" ];
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
];
};
dalvare1 = {
uid = 2758;
isNormalUser = true;
home = "/home/Computational/dalvare1";
description = "David Álvarez";
group = "Computational";
hosts = [ "apex" "hut" "tent" "fox" ];
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
];
};
varcila = {
uid = 5650;
isNormalUser = true;
home = "/home/Computational/varcila";
description = "Vincent Arcila";
group = "Computational";
hosts = [ "apex" "hut" "tent" "fox" ];
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
];
};
pmartin1 = {
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
uid = 9652;
isNormalUser = true;
home = "/home/Computational/pmartin1";
description = "Pedro J. Martinez-Ferrer";
group = "Computational";
hosts = [ "fox" ];
hashedPassword = "$6$nIgDMGnt4YIZl3G.$.JQ2jXLtDPRKsbsJfJAXdSvjDIzRrg7tNNjPkLPq3KJQhMjfDXRUvzagUHUU2TrE2hHM8/6uq8ex0UdxQ0ysl.";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a"
];
};
csiringo = {
uid = 9653;
isNormalUser = true;
home = "/home/Computational/csiringo";
description = "Cesare Siringo";
group = "Computational";
hosts = [ ];
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
];
};
acinca = {
uid = 9654;
isNormalUser = true;
home = "/home/Computational/acinca";
description = "Arnau Cinca";
group = "Computational";
hosts = [ "apex" "hut" "fox" "owl1" "owl2" ];
hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc"
];
};
};
groups = {
Computational = { gid = 564; };
tracing = { };
};
};
}

View File

@@ -1,10 +0,0 @@
{
# Provides the base system for a xeon node in the SSF rack.
imports = [
./xeon.nix
./ssf/fs.nix
./ssf/hosts.nix
./ssf/hosts-remote.nix
./ssf/net.nix
];
}

View File

@@ -1,9 +0,0 @@
{ pkgs, ... }:
{
networking.hosts = {
# Remote hosts visible from compute nodes
"10.106.0.236" = [ "raccoon" ];
"10.0.44.4" = [ "tent" ];
};
}

View File

@@ -1,23 +0,0 @@
{ pkgs, ... }:
{
networking.hosts = {
# Login
"10.0.40.30" = [ "apex" ];
# Storage
"10.0.40.40" = [ "bay" ]; "10.0.42.40" = [ "bay-ib" ]; "10.0.40.141" = [ "bay-ipmi" ];
"10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ];
"10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ]; "10.0.40.143" = [ "lake2-ipmi" ];
# Xeon compute
"10.0.40.1" = [ "owl1" ]; "10.0.42.1" = [ "owl1-ib" ]; "10.0.40.101" = [ "owl1-ipmi" ];
"10.0.40.2" = [ "owl2" ]; "10.0.42.2" = [ "owl2-ib" ]; "10.0.40.102" = [ "owl2-ipmi" ];
"10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ];
#"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ];
"10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ];
"10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ];
"10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ];
"10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ];
};
}

View File

@@ -1,23 +0,0 @@
{ pkgs, ... }:
{
# Infiniband (IPoIB)
environment.systemPackages = [ pkgs.rdma-core ];
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
networking = {
defaultGateway = "10.0.40.30";
nameservers = ["8.8.8.8"];
firewall = {
extraCommands = ''
# Prevent ssfhead from contacting our slurmd daemon
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
# But accept traffic to slurm ports from any other node in the subnet
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
# We also need to open the srun port range
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
'';
};
};
}

View File

@@ -1,7 +1,9 @@
{
# Provides the base system for a xeon node, not necessarily in the SSF rack.
# Provides the base system for a xeon node.
imports = [
./base.nix
./xeon/console.nix
./xeon/fs.nix
./xeon/getty.nix
./xeon/net.nix
];
}

View File

@@ -5,10 +5,4 @@
wantedBy = [ "getty.target" ];
serviceConfig.Restart = "always";
};
# Enable serial console
boot.kernelParams = [
"console=tty1"
"console=ttyS0,115200"
];
}

90
m/common/xeon/net.nix Normal file
View File

@@ -0,0 +1,90 @@
{ pkgs, ... }:
{
# Infiniband (IPoIB)
environment.systemPackages = [ pkgs.rdma-core ];
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
networking = {
defaultGateway = "10.0.40.30";
nameservers = ["8.8.8.8"];
proxy = {
default = "http://hut:23080/";
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40";
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
# https://github.com/golang/go/issues/16715
allProxy = null;
};
firewall = {
extraCommands = ''
# Prevent ssfhead from contacting our slurmd daemon
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
# But accept traffic to slurm ports from any other node in the subnet
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
# We also need to open the srun port range
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
'';
};
extraHosts = ''
10.0.40.30 ssfhead
# Node Entry for node: mds01 (ID=72)
10.0.40.40 bay mds01 mds01-eth0
10.0.42.40 bay-ib mds01-ib0
10.0.40.141 bay-ipmi mds01-ipmi0
# Node Entry for node: oss01 (ID=73)
10.0.40.41 oss01 oss01-eth0
10.0.42.41 oss01-ib0
10.0.40.142 oss01-ipmi0
# Node Entry for node: oss02 (ID=74)
10.0.40.42 lake2 oss02 oss02-eth0
10.0.42.42 lake2-ib oss02-ib0
10.0.40.143 lake2-ipmi oss02-ipmi0
# Node Entry for node: xeon01 (ID=15)
10.0.40.1 owl1 xeon01 xeon01-eth0
10.0.42.1 owl1-ib xeon01-ib0
10.0.40.101 owl1-ipmi xeon01-ipmi0
# Node Entry for node: xeon02 (ID=16)
10.0.40.2 owl2 xeon02 xeon02-eth0
10.0.42.2 owl2-ib xeon02-ib0
10.0.40.102 owl2-ipmi xeon02-ipmi0
# Node Entry for node: xeon03 (ID=17)
10.0.40.3 xeon03 xeon03-eth0
10.0.42.3 xeon03-ib0
10.0.40.103 xeon03-ipmi0
# Node Entry for node: xeon04 (ID=18)
10.0.40.4 xeon04 xeon04-eth0
10.0.42.4 xeon04-ib0
10.0.40.104 xeon04-ipmi0
# Node Entry for node: xeon05 (ID=19)
10.0.40.5 koro xeon05 xeon05-eth0
10.0.42.5 koro-ib xeon05-ib0
10.0.40.105 koro-ipmi xeon05-ipmi0
# Node Entry for node: xeon06 (ID=20)
10.0.40.6 xeon06 xeon06-eth0
10.0.42.6 xeon06-ib0
10.0.40.106 xeon06-ipmi0
# Node Entry for node: xeon07 (ID=21)
10.0.40.7 hut xeon07 xeon07-eth0
10.0.42.7 hut-ib xeon07-ib0
10.0.40.107 hut-ipmi xeon07-ipmi0
# Node Entry for node: xeon08 (ID=22)
10.0.40.8 eudy xeon08 xeon08-eth0
10.0.42.8 eudy-ib xeon08-ib0
10.0.40.108 eudy-ipmi xeon08-ipmi0
'';
};
}

View File

@@ -2,14 +2,13 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
./kernel/kernel.nix
./cpufreq.nix
./fs.nix
./users.nix
../module/hut-substituter.nix
../module/debuginfod.nix
];

View File

@@ -1,112 +0,0 @@
{ lib, config, pkgs, ... }:
{
imports = [
../common/base.nix
../common/xeon/console.nix
../module/amd-uprof.nix
../module/emulation.nix
../module/nvidia.nix
../module/slurm-client.nix
../module/hut-substituter.nix
./wireguard.nix
];
# Don't turn off on August as UPC has different dates.
# Fox works fine on power cuts.
systemd.timers.august-shutdown.enable = false;
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
boot.kernelModules = [ "kvm-amd" "amd_uncore" "amd_hsmp" ];
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
# Use performance for benchmarks
powerManagement.cpuFreqGovernor = "performance";
services.amd-uprof.enable = true;
# Disable NUMA balancing
boot.kernel.sysctl."kernel.numa_balancing" = 0;
# Expose kernel addresses
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
# Disable NMI watchdog to save one hw counter (for AMD uProf)
boot.kernel.sysctl."kernel.nmi_watchdog" = 0;
services.openssh.settings.X11Forwarding = true;
services.fail2ban.enable = true;
networking = {
timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
hostName = "fox";
# UPC network (may change over time, use DHCP)
# Public IP configuration:
# - Hostname: fox.ac.upc.edu
# - IP: 147.83.30.141
# - Gateway: 147.83.30.130
# - NetMask: 255.255.255.192
# Private IP configuration for BMC:
# - Hostname: fox-ipmi.ac.upc.edu
# - IP: 147.83.35.27
# - Gateway: 147.83.35.2
# - NetMask: 255.255.255.0
interfaces.enp1s0f0np0.useDHCP = true;
};
# Recommended for new graphics cards
hardware.nvidia.open = true;
# Mount NVME disks
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
# Mount the NFS home
fileSystems."/nfs/home" = {
device = "10.106.0.30:/home";
fsType = "nfs";
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
# Make a /nvme{0,1}/$USER directory for each user.
systemd.services.create-nvme-dirs = let
# Take only normal users in fox
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}"
]) users);
script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "local-fs.target" ];
after = [ "local-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

View File

@@ -1,53 +0,0 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgFox.file = ../../secrets/wg-fox.age;
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
# Determines the IP address and subnet of the server's end of the tunnel interface.
ips = [ "10.106.0.1/24" ];
# The port that WireGuard listens to. Must be accessible by the client.
listenPort = 666;
# Path to the private key file.
privateKeyFile = config.age.secrets.wgFox.path;
# Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
peers = [
# List of allowed peers.
{
name = "apex";
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
allowedIPs = [ "10.106.0.30/32" ];
}
{
name = "raccoon";
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
}
];
};
};
networking.hosts = {
"10.106.0.30" = [ "apex" ];
"10.106.0.236" = [ "raccoon" ];
"10.0.44.4" = [ "tent" ];
};
networking.firewall = {
extraCommands = ''
# Accept slurm connections to slurmd from apex (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
'';
};
}

View File

@@ -3,12 +3,160 @@ modules:
prober: http
timeout: 5s
http:
proxy_url: "http://127.0.0.1:23080"
skip_resolve_phase_with_proxy: true
follow_redirects: true
preferred_ip_protocol: "ip4"
valid_status_codes: [] # Defaults to 2xx
method: GET
http_with_proxy:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
skip_resolve_phase_with_proxy: true
http_with_proxy_and_headers:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
proxy_connect_header:
Proxy-Authorization:
- Bearer token
http_post_2xx:
prober: http
timeout: 5s
http:
method: POST
headers:
Content-Type: application/json
body: '{}'
http_post_body_file:
prober: http
timeout: 5s
http:
method: POST
body_file: "/files/body.txt"
http_basic_auth_example:
prober: http
timeout: 5s
http:
method: POST
headers:
Host: "login.example.com"
basic_auth:
username: "username"
password: "mysecret"
http_2xx_oauth_client_credentials:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
follow_redirects: true
preferred_ip_protocol: "ip4"
valid_status_codes:
- 200
- 201
oauth2:
client_id: "client_id"
client_secret: "client_secret"
token_url: "https://api.example.com/token"
endpoint_params:
grant_type: "client_credentials"
http_custom_ca_example:
prober: http
http:
method: GET
tls_config:
ca_file: "/certs/my_cert.crt"
http_gzip:
prober: http
http:
method: GET
compression: gzip
http_gzip_with_accept_encoding:
prober: http
http:
method: GET
compression: gzip
headers:
Accept-Encoding: gzip
tls_connect:
prober: tcp
timeout: 5s
tcp:
tls: true
tcp_connect_example:
prober: tcp
timeout: 5s
imap_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "OK.*STARTTLS"
- send: ". STARTTLS"
- expect: "OK"
- starttls: true
- send: ". capability"
- expect: "CAPABILITY IMAP4rev1"
smtp_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "^220 ([^ ]+) ESMTP (.+)$"
- send: "EHLO prober\r"
- expect: "^250-STARTTLS"
- send: "STARTTLS\r"
- expect: "^220"
- starttls: true
- send: "EHLO prober\r"
- expect: "^250-AUTH"
- send: "QUIT\r"
irc_banner_example:
prober: tcp
timeout: 5s
tcp:
query_response:
- send: "NICK prober"
- send: "USER prober prober prober :prober"
- expect: "PING :([^ ]+)"
send: "PONG ${1}"
- expect: "^:[^ ]+ 001"
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"
dns_udp_example:
prober: dns
timeout: 5s
dns:
query_name: "www.prometheus.io"
query_type: "A"
valid_rcodes:
- NOERROR
validate_answer_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
fail_if_all_match_regexp:
- ".*127.0.0.1"
fail_if_not_matches_regexp:
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
fail_if_none_matches_regexp:
- "127.0.0.1"
validate_authority_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
validate_additional_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
dns_soa:
prober: dns
dns:
query_name: "prometheus.io"
query_type: "SOA"
dns_tcp_example:
prober: dns
dns:
transport_protocol: "tcp" # defaults to "udp"
preferred_ip_protocol: "ip4" # defaults to "ip6"
query_name: "www.prometheus.io"

View File

@@ -1,42 +1,27 @@
{ config, pkgs, lib, ... }:
{ config, pkgs, ... }:
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/ceph.nix
../module/debuginfod.nix
../module/emulation.nix
../module/slurm-client.nix
./gitlab-runner.nix
./monitoring.nix
./nfs.nix
./slurm-server.nix
./nix-serve.nix
./public-inbox.nix
./gitea.nix
./msmtp.nix
./postgresql.nix
./nginx.nix
./p.nix
#./pxe.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f";
fileSystems = {
"/" = lib.mkForce {
device = "/dev/disk/by-label/nvme";
fsType = "ext4";
neededForBoot = true;
options = [ "noatime" ];
};
"/boot" = lib.mkForce {
device = "/dev/disk/by-label/nixos-boot";
fsType = "ext4";
neededForBoot = true;
};
};
boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";
networking = {
hostName = "hut";
@@ -54,11 +39,6 @@
iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept
'';
# Flush all rules and chains on stop so it won't break on start
extraStopCommands = ''
iptables -F
iptables -X
'';
};
};

View File

@@ -1,9 +1,8 @@
{ pkgs, lib, config, ... }:
{
age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age;
age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age;
age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age;
age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age;
age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age;
services.gitlab-runner = {
enable = true;
@@ -22,90 +21,21 @@
"--docker-network-mode host"
];
environmentVariables = {
https_proxy = "http://hut:23080";
http_proxy = "http://hut:23080";
https_proxy = "http://localhost:23080";
http_proxy = "http://localhost:23080";
};
};
in {
# For pm.bsc.es/gitlab
gitlab-pm-shell = common-shell // {
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path;
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path;
};
gitlab-pm-docker = common-docker // {
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path;
};
gitlab-bsc-docker = {
# gitlab.bsc.es still uses the old token mechanism
registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path;
tagList = [ "docker" "hut" ];
environmentVariables = {
# We cannot access the hut local interface from docker, so we connect
# to hut directly via the ethernet one.
https_proxy = "http://hut:23080";
http_proxy = "http://hut:23080";
};
executor = "docker";
dockerImage = "alpine";
dockerVolumes = [
"/nix/store:/nix/store:ro"
"/nix/var/nix/db:/nix/var/nix/db:ro"
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
];
dockerExtraHosts = [
# Required to pass the proxy via hut
"hut:10.0.40.7"
];
dockerDisableCache = true;
registrationFlags = [
# Increase build log length to 64 MiB
"--output-limit 65536"
];
preBuildScript = pkgs.writeScript "setup-container" ''
mkdir -p -m 0755 /nix/var/log/nix/drvs
mkdir -p -m 0755 /nix/var/nix/gcroots
mkdir -p -m 0755 /nix/var/nix/profiles
mkdir -p -m 0755 /nix/var/nix/temproots
mkdir -p -m 0755 /nix/var/nix/userpool
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
mkdir -p -m 0700 "$HOME/.nix-defexpr"
mkdir -p -m 0700 "$HOME/.ssh"
cat > "$HOME/.ssh/config" << EOF
Host bscpm04.bsc.es gitlab-internal.bsc.es
User git
ProxyCommand nc -X connect -x hut:23080 %h %p
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
ProxyCommand nc -X connect -x hut:23080 %h %p
EOF
cat >> "$HOME/.ssh/known_hosts" << EOF
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
EOF
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
# Required to load SSL certificate paths
. ${pkgs.cacert}/nix-support/setup-hook
'';
environmentVariables = {
ENV = "/etc/profile";
USER = "root";
NIX_REMOTE = "daemon";
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
};
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path;
};
};
};
# DOCKER* chains are useless, override at FORWARD and nixos-fw
networking.firewall.extraCommands = ''
# Don't forward any traffic from docker
iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse
# Allow incoming traffic from docker to 23080
iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT
'';
#systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash";
systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false;
systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner";

View File

@@ -1,31 +0,0 @@
{ pkgs, config, lib, ... }:
let
gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { }
''
cp ${./gpfs-probe.sh} $out;
chmod +x $out
''
;
in
{
# Use a new user to handle the SSH keys
users.groups.ssh-robot = { };
users.users.ssh-robot = {
description = "SSH Robot";
isNormalUser = true;
home = "/var/lib/ssh-robot";
};
systemd.services.gpfs-probe = {
description = "Daemon to report GPFS latency via SSH";
path = [ pkgs.openssh pkgs.netcat ];
after = [ "network.target" ];
wantedBy = [ "default.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}";
User = "ssh-robot";
Group = "ssh-robot";
};
};
}

View File

@@ -1,18 +0,0 @@
#!/bin/sh
N=500
t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
if [ -z "$t" ]; then
t="5.00"
fi
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
# HELP gpfs_touch_latency Time to create $N files.
# TYPE gpfs_touch_latency gauge
gpfs_touch_latency $t
EOF

13
m/hut/ipmi.yml Normal file
View File

@@ -0,0 +1,13 @@
modules:
default:
collectors:
- bmc
- ipmi
- chassis
lan:
collectors:
- ipmi
- chassis
user: ""
pass: ""

View File

@@ -1,13 +1,7 @@
{ config, lib, ... }:
{
imports = [
../module/slurm-exporter.nix
../module/meteocat-exporter.nix
../module/upc-qaire-exporter.nix
./gpfs-probe.nix
../module/nix-daemon-exporter.nix
];
imports = [ ../module/slurm-exporter.nix ];
age.secrets.grafanaJungleRobotPassword = {
file = ../../secrets/jungle-robot-password.age;
@@ -15,8 +9,6 @@
mode = "400";
};
age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age;
services.grafana = {
enable = true;
settings = {
@@ -49,7 +41,7 @@
services.prometheus = {
enable = true;
port = 9001;
retentionTime = "5y";
retentionTime = "1y";
listenAddress = "127.0.0.1";
};
@@ -78,13 +70,13 @@
enable = true;
group = "root";
user = "root";
configFile = config.age.secrets.ipmiYml.path;
# extraFlags = [ "--log.level=debug" ];
configFile = ./ipmi.yml;
#extraFlags = [ "--log.level=debug" ];
listenAddress = "127.0.0.1";
};
node = {
enable = true;
enabledCollectors = [ "systemd" "logind" ];
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
@@ -110,10 +102,6 @@
"127.0.0.1:9252"
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
"127.0.0.1:9341" # Slurm exporter
"127.0.0.1:9966" # GPFS custom exporter
"127.0.0.1:9999" # Nix-daemon custom exporter
"127.0.0.1:9929" # Meteocat custom exporter
"127.0.0.1:9928" # UPC Qaire custom exporter
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
];
}];
@@ -169,9 +157,6 @@
"8.8.8.8"
"ssfhead"
"anella-bsc.cesca.cat"
"upc-anella.cesca.cat"
"fox.ac.upc.edu"
"arenys5.ac.upc.edu"
];
}];
relabel_configs = [
@@ -217,7 +202,7 @@
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
separator = ";";
regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end
regex = "(.*)";
target_label = "instance";
replacement = "\${1}";
action = "replace";
@@ -259,14 +244,6 @@
module = [ "raccoon" ];
};
}
{
job_name = "raccoon";
static_configs = [
{
targets = [ "127.0.0.1:19002" ]; # Node exporter
}
];
}
];
};
}

View File

@@ -1,76 +0,0 @@
{ theFlake, pkgs, ... }:
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = pkgs.fetchgit {
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
};
buildInputs = [ pkgs.hugo ];
buildPhase = ''
rm -rf public/
hugo
'';
installPhase = ''
cp -r public $out
'';
# Don't mess doc/
dontFixup = true;
};
in
{
networking.firewall.allowedTCPPorts = [ 80 ];
services.nginx = {
enable = true;
virtualHosts."jungle.bsc.es" = {
root = "${website}";
listen = [
{
addr = "0.0.0.0";
port = 80;
}
];
extraConfig = ''
set_real_ip_from 127.0.0.1;
set_real_ip_from 84.88.52.107;
real_ip_recursive on;
real_ip_header X-Forwarded-For;
location /git {
rewrite ^/git$ / break;
rewrite ^/git/(.*) /$1 break;
proxy_pass http://127.0.0.1:3000;
proxy_redirect http:// $scheme://;
}
location /cache {
rewrite ^/cache/(.*) /$1 break;
proxy_pass http://127.0.0.1:5000;
proxy_redirect http:// $scheme://;
}
location /lists {
proxy_pass http://127.0.0.1:8081;
proxy_redirect http:// $scheme://;
}
location /grafana {
proxy_pass http://127.0.0.1:2342;
proxy_redirect http:// $scheme://;
proxy_set_header Host $host;
# Websockets
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ ^/~(.+?)(/.*)?$ {
alias /ceph/home/$1/public_html$2;
index index.html index.htm;
autoindex on;
absolute_redirect off;
}
location /p/ {
alias /ceph/p/;
}
'';
};
};
}

View File

@@ -1,43 +0,0 @@
{ pkgs, lib, config, ... }:
let
p = pkgs.writeShellScriptBin "p" ''
set -e
cd /ceph
pastedir="p/$USER"
mkdir -p "$pastedir"
ext="txt"
if [ -n "$1" ]; then
ext="$1"
fi
out=$(mktemp "$pastedir/XXXXXXXX.$ext")
cat > "$out"
chmod go+r "$out"
echo "https://jungle.bsc.es/$out"
'';
in
{
environment.systemPackages = with pkgs; [ p ];
# Make sure we have a directory per user. We cannot use the nice
# systemd-tmpfiles-setup.service service because this is a remote FS, and it
# may not be mounted when it runs.
systemd.services.create-paste-dirs = let
# Take only normal users in hut
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}"
]) users);
script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "remote-fs.target" ];
after = [ "remote-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
}

7
m/hut/slurm-server.nix Normal file
View File

@@ -0,0 +1,7 @@
{ ... }:
{
services.slurm = {
server.enable = true;
};
}

View File

@@ -1,15 +1,15 @@
- targets:
- owl1-ipmi
- owl2-ipmi
- xeon03-ipmi
- xeon04-ipmi
- koro-ipmi
- weasel-ipmi
- hut-ipmi
- eudy-ipmi
- 10.0.40.101
- 10.0.40.102
- 10.0.40.103
- 10.0.40.104
- 10.0.40.105
- 10.0.40.106
- 10.0.40.107
- 10.0.40.108
# Storage
- bay-ipmi
- oss01-ipmi
- lake2-ipmi
- 10.0.40.141
- 10.0.40.142
- 10.0.40.143
labels:
job: ipmi-lan

View File

@@ -2,7 +2,7 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
../eudy/cpufreq.nix

View File

@@ -2,17 +2,12 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/monitoring.nix
../module/hut-substituter.nix
];
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
boot.kernel.sysctl = {
"kernel.yama.ptrace_scope" = lib.mkForce "1";
};
environment.systemPackages = with pkgs; [
ceph
];

View File

@@ -1,70 +0,0 @@
{
# In physical order from top to bottom (see note below)
ssf = {
# Switches for Ethernet and OmniPath
switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; };
switch-opa = { pos=41; size=1; };
# SSF login
apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; };
# Storage
bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; };
lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; };
# Compute xeon
owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; };
owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; };
xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
# Slot 34 empty
koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };
# 16 KNL nodes, 4 per chassis
knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; };
knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; };
knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; };
knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; };
# Slot 19 empty
# EPI (hw team, guessed order)
epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; };
epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; };
epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; };
anon = { pos=14; size=2; }; # Unlabeled machine. Operative
# These are old and decommissioned (off)
power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; };
powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; };
gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; };
odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; };
amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic
moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; };
};
bsc2218 = {
raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; };
tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; };
};
upc = {
fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; };
};
# NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the
# bottom. Example:
#
# | ... | - [pos+size] <--- Label in chassis
# +--------+
# | node | - [pos+1]
# | 2U | - [pos]
# +------- +
# | ... | - [pos-1]
#
# NOTE: The board and sn refers to the FRU information (Board Product and
# Board Serial) via `ipmitool fru print 0`.
}

View File

@@ -1,49 +0,0 @@
{ config, lib, pkgs, ... }:
{
options = {
services.amd-uprof = {
enable = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Whether to enable AMD uProf.";
};
};
};
# Only setup amd-uprof if enabled
config = lib.mkIf config.services.amd-uprof.enable {
# First make sure that we add the module to the list of available modules
# in the kernel matching the same kernel version of this configuration.
boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ];
boot.kernelModules = [ "AMDPowerProfiler" ];
# Make the userspace tools available in $PATH.
environment.systemPackages = with pkgs; [ amd-uprof ];
# The AMDPowerProfiler module doesn't create the /dev device nor it emits
# any uevents, so we cannot use udev rules to automatically create the
# device. Instead, we run a systemd unit that does it after loading the
# modules.
systemd.services.amd-uprof-device = {
description = "Create /dev/AMDPowerProfiler device";
after = [ "systemd-modules-load.service" ];
wantedBy = [ "multi-user.target" ];
unitConfig.ConditionPathExists = [
"/proc/AMDPowerProfiler/device"
"!/dev/AMDPowerProfiler"
];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" ''
mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0
'';
ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" ''
rm -f /dev/AMDPowerProfiler
'';
};
};
};
}

View File

@@ -1,13 +0,0 @@
{ config, ... }:
{
nix.settings =
# Don't add hut as a cache to itself
assert config.networking.hostName != "hut";
{
extra-substituters = [ "http://hut/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
# Set a low timeout in case hut is down
connect-timeout = 3; # seconds
};
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-meteocat-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter";
};
};
}

View File

@@ -1,26 +0,0 @@
#!/bin/sh
# Locate nix daemon pid
nd=$(pgrep -o nix-daemon)
# Locate children of nix-daemon
pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children")
# For each children, locate 2nd level children
pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n')
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
# HELP nix_daemon_build Nix daemon derivation build state.
# TYPE nix_daemon_build gauge
EOF
for pid in $pids2; do
name=$(cat /proc/$pid/environ 2>/dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.')
user=$(ps -o uname= -p "$pid")
if [ -n "$name" -a -n "$user" ]; then
printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name"
fi
done

View File

@@ -1,23 +0,0 @@
{ pkgs, config, lib, ... }:
let
script = pkgs.runCommand "nix-daemon-exporter.sh" { }
''
cp ${./nix-daemon-builds.sh} $out;
chmod +x $out
''
;
in
{
systemd.services.nix-daemon-exporter = {
description = "Daemon to export nix-daemon metrics";
path = [ pkgs.procps pkgs.ripgrep ];
wantedBy = [ "default.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}";
# Needed root to read the environment, potentially unsafe
User = "root";
Group = "root";
};
};
}

View File

@@ -1,20 +0,0 @@
{ lib, config, pkgs, ... }:
{
# Configure Nvidia driver to use with CUDA
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set");
hardware.graphics.enable = true;
nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
# enable support for derivations which require nvidia-gpu to be available
# > requiredSystemFeatures = [ "cuda" ];
programs.nix-required-mounts.enable = true;
programs.nix-required-mounts.presets.nvidia-gpu.enable = true;
# They forgot to add the symlink
programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [
config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument
];
environment.systemPackages = [ pkgs.cudainfo ];
}

View File

@@ -1,68 +0,0 @@
{ config, lib, pkgs, ... }:
let
cfg = config.services.p;
in
{
options = {
services.p = {
enable = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Whether to enable the p service.";
};
path = lib.mkOption {
type = lib.types.str;
default = "/var/lib/p";
description = "Where to save the pasted files on disk.";
};
url = lib.mkOption {
type = lib.types.str;
default = "https://jungle.bsc.es/p";
description = "URL prefix for the printed file.";
};
};
};
config = lib.mkIf cfg.enable {
environment.systemPackages = let
p = pkgs.writeShellScriptBin "p" ''
set -e
pastedir="${cfg.path}/$USER"
cd "$pastedir"
ext="txt"
if [ -n "$1" ]; then
ext="$1"
fi
out=$(mktemp "XXXXXXXX.$ext")
cat > "$out"
chmod go+r "$out"
echo "${cfg.url}/$USER/$out"
'';
in [ p ];
systemd.services.p = let
# Take only normal users
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
# Create a directory for each user
commands = lib.concatLists (lib.mapAttrsToList (_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 ${cfg.path}/${user.name}"
]) users);
in {
description = "P service setup";
requires = [ "network-online.target" ];
#wants = [ "remote-fs.target" ];
#after = [ "remote-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = pkgs.writeShellScript "p-init.sh" (''
install -d -o root -g root -m 0755 ${cfg.path}
'' + (lib.concatLines commands));
};
};
};
}

View File

@@ -1,33 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.power.policy;
in
{
options = {
power.policy = mkOption {
type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]);
default = null;
description = "Set power policy to use via IPMI.";
};
};
config = mkIf (cfg != null) {
systemd.services."power-policy" = {
description = "Set power policy to use via IPMI";
wantedBy = [ "multi-user.target" ];
unitConfig = {
StartLimitBurst = "10";
StartLimitIntervalSec = "10m";
};
serviceConfig = {
ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}";
Type = "oneshot";
Restart = "on-failure";
RestartSec = "5s";
};
};
};
}

View File

@@ -1,10 +1,33 @@
{ lib, ... }:
{ config, pkgs, lib, ... }:
{
imports = [
./slurm-common.nix
];
let
suspendProgram = pkgs.writeScript "suspend.sh" ''
#!/usr/bin/env bash
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeScript "resume.sh" ''
#!/usr/bin/env bash
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
in {
systemd.services.slurmd.serviceConfig = {
# Kill all processes in the control group on stop/restart. This will kill
# all the jobs running, so ensure that we only upgrade when the nodes are
@@ -12,13 +35,73 @@
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
KillMode = lib.mkForce "control-group";
# If slurmd fails to contact the control server it will fail, causing the
# node to remain out of service until manually restarted. Always try to
# restart it.
Restart = "always";
RestartSec = "30s";
};
services.slurm.client.enable = true;
services.slurm = {
client.enable = true;
controlMachine = "hut";
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
extraConfig = ''
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
# not with Intel MPI. For that use the compatibility shim libpmi.so
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
# library in SLURM (--mpi=pmix). See more details here:
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
MpiDefault=pmix
# When a node reboots return that node to the slurm queue as soon as it
# becomes operative again.
ReturnToService=2
# Track all processes by using a cgroup
ProctrackType=proctrack/cgroup
# Enable task/affinity to allow the jobs to run in a specified subset of
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=hut
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_CORE
'';
};
age.secrets.mungeKey = {
file = ../../secrets/munge-key.age;
owner = "munge";
group = "munge";
};
services.munge = {
enable = true;
password = config.age.secrets.mungeKey.path;
};
}

View File

@@ -1,115 +0,0 @@
{ config, pkgs, ... }:
let
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeShellScript "resume.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
in {
services.slurm = {
controlMachine = "apex";
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
extraConfig = ''
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
# not with Intel MPI. For that use the compatibility shim libpmi.so
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
# library in SLURM (--mpi=pmix). See more details here:
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
MpiDefault=pmix
# When a node reboots return that node to the slurm queue as soon as it
# becomes operative again.
ReturnToService=2
# Track all processes by using a cgroup
ProctrackType=proctrack/cgroup
# Enable task/affinity to allow the jobs to run in a specified subset of
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=fox
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_Core
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
# This sets up the "extern" step into which ssh-launched processes will be
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
# when a task runs (srun) so we can ssh early.
PrologFlags=Alloc,Contain,X11
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
# adopted by the external step, similar to tasks running in regular steps
# LaunchParameters=ulimit_pam_adopt
SlurmdDebug=debug5
#DebugFlags=Protocol,Cgroup
'';
extraCgroupConfig = ''
CgroupPlugin=cgroup/v2
#ConstrainCores=yes
'';
};
# Place the slurm config in /etc as this will be required by PAM
environment.etc.slurm.source = config.services.slurm.etcSlurm;
age.secrets.mungeKey = {
file = ../../secrets/munge-key.age;
owner = "munge";
group = "munge";
};
services.munge = {
enable = true;
password = config.age.secrets.mungeKey.path;
};
}

View File

@@ -1,23 +0,0 @@
{ ... }:
{
imports = [
./slurm-common.nix
];
services.slurm.server.enable = true;
networking.firewall = {
extraCommands = ''
# Accept slurm connections to controller from compute nodes
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from compute nodes for srun
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
# Accept slurm connections to controller from fox (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from fox for srun (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
'';
};
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-upc-qaire-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter";
};
};
}

View File

@@ -1,35 +0,0 @@
{config, ...}:
{
age.secrets.vpn-dac-login.file = ../../secrets/vpn-dac-login.age;
age.secrets.vpn-dac-client-key.file = ../../secrets/vpn-dac-client-key.age;
services.openvpn.servers = {
# systemctl status openvpn-dac.service
dac = {
config = ''
client
dev tun
proto tcp
remote vpn.ac.upc.edu 1194
remote vpn.ac.upc.edu 80
resolv-retry infinite
nobind
persist-key
persist-tun
ca ${./vpn-dac/ca.crt}
cert ${./vpn-dac/client.crt}
# Only key needs to be secret
key ${config.age.secrets.vpn-dac-client-key.path}
remote-cert-tls server
comp-lzo
verb 3
auth-user-pass ${config.age.secrets.vpn-dac-login.path}
reneg-sec 0
# Only route fox-ipmi
pull-filter ignore "route "
route 147.83.35.27 255.255.255.255
'';
};
};
}

View File

@@ -1,31 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIFUjCCBDqgAwIBAgIJAJH118PApk5hMA0GCSqGSIb3DQEBCwUAMIHLMQswCQYD
VQQGEwJFUzESMBAGA1UECBMJQmFyY2Vsb25hMRIwEAYDVQQHEwlCYXJjZWxvbmEx
LTArBgNVBAoTJFVuaXZlcnNpdGF0IFBvbGl0ZWNuaWNhIGRlIENhdGFsdW55YTEk
MCIGA1UECxMbQXJxdWl0ZWN0dXJhIGRlIENvbXB1dGFkb3JzMRAwDgYDVQQDEwdM
Q0FDIENBMQ0wCwYDVQQpEwRMQ0FDMR4wHAYJKoZIhvcNAQkBFg9sY2FjQGFjLnVw
Yy5lZHUwHhcNMTYwMTEyMTI0NDIxWhcNNDYwMTEyMTI0NDIxWjCByzELMAkGA1UE
BhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0w
KwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAi
BgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENB
QyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMu
ZWR1MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0CteSeof7Xwi51kC
F0nQ4E9iR5Lq7wtfRuVPn6JJcIxJJ6+F9gr4R/HIHTztW4XAzReE36DYfexupx3D
6UgQIkMLlVyGqRbulNF+RnCx20GosF7Dm4RGBVvOxBP1PGjYq/A+XhaaDAFd0cOF
LMNkzuYP7PF0bnBEaHnxmN8bPmuyDyas7fK9AAc3scyWT2jSBPbOVFvCJwPg8MH9
V/h+hKwL/7hRt1MVfVv2qyIuKwTki8mUt0RcVbP7oJoRY5K1+R52phIz/GL/b4Fx
L6MKXlQxLi8vzP4QZXgCMyV7oFNdU3VqCEXBA11YIRvsOZ4QS19otIk/ZWU5x+HH
LAIJ7wIDAQABo4IBNTCCATEwHQYDVR0OBBYEFNyezX1cH1N4QR14ebBpljqmtE7q
MIIBAAYDVR0jBIH4MIH1gBTcns19XB9TeEEdeHmwaZY6prRO6qGB0aSBzjCByzEL
MAkGA1UEBhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vs
b25hMS0wKwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVu
eWExJDAiBgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UE
AxMHTENBQyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0Bh
Yy51cGMuZWR1ggkAkfXXw8CmTmEwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsF
AAOCAQEAUAmOvVXIQrR+aZVO0bOTeugKBHB75eTIZSIHIn2oDUvDbAP5GXIJ56A1
6mZXxemSMY8/9k+pRcwJhfat3IgvAN159XSqf9kRv0NHgc3FWUI1Qv/BsAn0vJO/
oK0dbmbbRWqt86qNrCN+cUfz5aovvxN73jFfnvfDQFBk/8enj9wXxYfokjjLPR1Q
+oTkH8dY68qf71oaUB9MndppPEPSz0K1S6h1XxvJoSu9MVSXOQHiq1cdZdxRazI3
4f7q9sTCL+khwDAuZxAYzlEYxFFa/NN8PWU6xPw6V+t/aDhOiXUPJQB/O/K7mw3Z
TQQx5NqM7B5jjak5fauR3/oRD8XXsA==
-----END CERTIFICATE-----

View File

@@ -1,100 +0,0 @@
Certificate:
Data:
Version: 3 (0x2)
Serial Number: 2 (0x2)
Signature Algorithm: sha256WithRSAEncryption
Issuer: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
Validity
Not Before: Jan 12 12:45:41 2016 GMT
Not After : Jan 12 12:45:41 2046 GMT
Subject: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=client/name=LCAC/emailAddress=lcac@ac.upc.edu
Subject Public Key Info:
Public Key Algorithm: rsaEncryption
Public-Key: (2048 bit)
Modulus:
00:97:99:fa:7a:0e:4d:e2:1d:a5:b1:a8:14:18:64:
c7:66:bf:de:99:1d:92:3b:86:82:4d:95:39:f7:a6:
56:49:97:14:4f:e3:37:00:6c:f4:d0:1d:56:79:e7:
19:b5:dd:36:15:8e:1d:57:7b:59:29:d2:11:bf:58:
48:e0:f7:41:3d:16:64:8d:a2:0b:4a:ac:fa:c6:83:
dc:10:2a:2c:d9:97:48:ee:11:2a:bc:4b:60:dd:b9:
2e:8f:45:ca:87:0b:38:65:1c:f8:a2:1d:f9:50:aa:
6e:60:f9:48:df:57:12:23:e1:e7:0c:81:5c:9f:c5:
b2:e6:99:99:95:30:6d:57:36:06:8c:fd:fb:f9:4f:
60:d2:3c:ba:ae:28:56:2f:da:58:5c:e8:c5:7b:ec:
76:d9:28:6e:fb:8c:07:f9:d7:23:c3:72:76:3c:fa:
dc:20:67:8f:cc:16:e0:91:07:d5:68:f9:20:4d:7d:
5c:2d:02:04:16:76:52:f3:53:be:a3:dc:0d:d5:fb:
6b:55:29:f3:52:35:c8:7d:99:d1:4a:94:be:b1:8e:
fd:85:18:25:eb:41:e9:56:da:af:62:84:20:0a:00:
17:94:92:94:91:6a:f8:54:37:17:ee:1e:bb:fb:93:
71:91:d9:e4:e9:b8:3b:18:7d:6d:7d:4c:ce:58:55:
f9:41
Exponent: 65537 (0x10001)
X509v3 extensions:
X509v3 Basic Constraints:
CA:FALSE
Netscape Comment:
Easy-RSA Generated Certificate
X509v3 Subject Key Identifier:
1B:88:06:D5:33:1D:5C:48:46:B5:DE:78:89:36:96:91:3A:74:43:18
X509v3 Authority Key Identifier:
keyid:DC:9E:CD:7D:5C:1F:53:78:41:1D:78:79:B0:69:96:3A:A6:B4:4E:EA
DirName:/C=ES/ST=Barcelona/L=Barcelona/O=Universitat Politecnica de Catalunya/OU=Arquitectura de Computadors/CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
serial:91:F5:D7:C3:C0:A6:4E:61
X509v3 Extended Key Usage:
TLS Web Client Authentication
X509v3 Key Usage:
Digital Signature
X509v3 Subject Alternative Name:
DNS:client
Signature Algorithm: sha256WithRSAEncryption
42:e8:50:b2:e7:88:75:86:0b:bb:29:e3:aa:c6:0e:4c:e8:ea:
3d:0c:02:31:7f:3b:80:0c:3f:80:af:45:d6:62:27:a0:0e:e7:
26:09:12:97:95:f8:d9:9b:89:b5:ef:56:64:f1:de:82:74:e0:
31:0a:cc:90:0a:bd:50:b8:54:95:0a:ae:3b:40:df:76:b6:d1:
01:2e:f3:96:9f:52:d4:e9:14:6d:b7:14:9d:45:99:33:36:2a:
01:0b:15:1a:ed:55:dc:64:83:65:1a:06:42:d9:c7:dc:97:d4:
02:81:c2:58:2b:ea:e4:b7:ae:84:3a:e4:3f:f1:2e:fa:ec:f3:
40:5d:b8:6a:d5:5e:e1:e8:2f:e2:2f:48:a4:38:a1:4f:22:e3:
4f:66:94:aa:02:78:9a:2b:7a:5d:aa:aa:51:a5:e3:d0:91:e9:
1d:f9:08:ed:8b:51:c9:a6:af:46:85:b5:1c:ed:12:a1:28:33:
75:36:00:d8:5c:14:65:96:c0:28:7d:47:50:a4:89:5f:b0:72:
1a:4b:13:17:26:0f:f0:b8:65:3c:e9:96:36:f9:bf:90:59:33:
87:1f:01:03:25:f8:f0:3a:9b:33:02:d0:0a:43:b5:0a:cf:62:
a1:45:38:37:07:9d:9c:94:0b:31:c6:3c:34:b7:fc:5a:0c:e4:
bf:23:f6:7d
-----BEGIN CERTIFICATE-----
MIIFqjCCBJKgAwIBAgIBAjANBgkqhkiG9w0BAQsFADCByzELMAkGA1UEBhMCRVMx
EjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0wKwYDVQQK
EyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAiBgNVBAsT
G0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENBQyBDQTEN
MAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MB4X
DTE2MDExMjEyNDU0MVoXDTQ2MDExMjEyNDU0MVowgcoxCzAJBgNVBAYTAkVTMRIw
EAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNlbG9uYTEtMCsGA1UEChMk
VW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1bnlhMSQwIgYDVQQLExtB
cnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxDzANBgNVBAMTBmNsaWVudDENMAsG
A1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MIIBIjAN
BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl5n6eg5N4h2lsagUGGTHZr/emR2S
O4aCTZU596ZWSZcUT+M3AGz00B1WeecZtd02FY4dV3tZKdIRv1hI4PdBPRZkjaIL
Sqz6xoPcECos2ZdI7hEqvEtg3bkuj0XKhws4ZRz4oh35UKpuYPlI31cSI+HnDIFc
n8Wy5pmZlTBtVzYGjP37+U9g0jy6rihWL9pYXOjFe+x22Shu+4wH+dcjw3J2PPrc
IGePzBbgkQfVaPkgTX1cLQIEFnZS81O+o9wN1ftrVSnzUjXIfZnRSpS+sY79hRgl
60HpVtqvYoQgCgAXlJKUkWr4VDcX7h67+5Nxkdnk6bg7GH1tfUzOWFX5QQIDAQAB
o4IBljCCAZIwCQYDVR0TBAIwADAtBglghkgBhvhCAQ0EIBYeRWFzeS1SU0EgR2Vu
ZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQWBBQbiAbVMx1cSEa13niJNpaROnRD
GDCCAQAGA1UdIwSB+DCB9YAU3J7NfVwfU3hBHXh5sGmWOqa0TuqhgdGkgc4wgcsx
CzAJBgNVBAYTAkVTMRIwEAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNl
bG9uYTEtMCsGA1UEChMkVW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1
bnlhMSQwIgYDVQQLExtBcnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxEDAOBgNV
BAMTB0xDQUMgQ0ExDTALBgNVBCkTBExDQUMxHjAcBgkqhkiG9w0BCQEWD2xjYWNA
YWMudXBjLmVkdYIJAJH118PApk5hMBMGA1UdJQQMMAoGCCsGAQUFBwMCMAsGA1Ud
DwQEAwIHgDARBgNVHREECjAIggZjbGllbnQwDQYJKoZIhvcNAQELBQADggEBAELo
ULLniHWGC7sp46rGDkzo6j0MAjF/O4AMP4CvRdZiJ6AO5yYJEpeV+NmbibXvVmTx
3oJ04DEKzJAKvVC4VJUKrjtA33a20QEu85afUtTpFG23FJ1FmTM2KgELFRrtVdxk
g2UaBkLZx9yX1AKBwlgr6uS3roQ65D/xLvrs80BduGrVXuHoL+IvSKQ4oU8i409m
lKoCeJorel2qqlGl49CR6R35CO2LUcmmr0aFtRztEqEoM3U2ANhcFGWWwCh9R1Ck
iV+wchpLExcmD/C4ZTzpljb5v5BZM4cfAQMl+PA6mzMC0ApDtQrPYqFFODcHnZyU
CzHGPDS3/FoM5L8j9n0=
-----END CERTIFICATE-----

View File

@@ -2,13 +2,12 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix
../module/debuginfod.nix
../module/hut-substituter.nix
];
# Select the this using the ID to avoid mismatches

View File

@@ -2,13 +2,12 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix
../module/debuginfod.nix
../module/hut-substituter.nix
];
# Select the this using the ID to avoid mismatches

View File

@@ -3,24 +3,11 @@
{
imports = [
../common/base.nix
../common/ssf/hosts.nix
../module/emulation.nix
../module/debuginfod.nix
../module/nvidia.nix
../eudy/kernel/perf.nix
./wireguard.nix
../module/hut-substituter.nix
];
# Don't install Grub on the disk yet
boot.loader.grub.device = "nodev";
# Enable serial console
boot.kernelParams = [
"console=tty1"
"console=ttyS1,115200"
];
networking = {
hostName = "raccoon";
# Only BSC DNSs seem to be reachable from the office VLAN
@@ -30,41 +17,18 @@
address = "84.88.51.152";
prefixLength = 25;
} ];
interfaces.enp5s0f1.ipv4.addresses = [ {
address = "10.0.44.1";
prefixLength = 24;
} ];
nat = {
enable = true;
internalInterfaces = [ "enp5s0f1" ];
externalInterface = "eno0";
};
hosts = {
"10.0.44.4" = [ "tent" ];
"84.88.53.236" = [ "apex" ];
};
};
# Mount the NFS home
fileSystems."/nfs/home" = {
device = "10.106.0.30:/home";
fsType = "nfs";
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
# Enable performance governor
powerManagement.cpuFreqGovernor = "performance";
hardware.nvidia.open = false; # Maxwell is older than Turing architecture
services.openssh.settings.X11Forwarding = true;
services.prometheus.exporters.node = {
# Configure Nvidia driver to use with CUDA
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
hardware.opengl = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
driSupport = true;
setLdLibraryPath = true;
};
nixpkgs.config.allowUnfree = true;
nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
users.motd = ''

View File

@@ -1,48 +0,0 @@
{ config, pkgs, ... }:
{
networking.nat = {
enable = true;
enableIPv6 = false;
externalInterface = "eno0";
internalInterfaces = [ "wg0" ];
};
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgRaccoon.file = ../../secrets/wg-raccoon.age;
# Enable WireGuard
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
wg0 = {
ips = [ "10.106.0.236/24" ];
listenPort = 666;
privateKeyFile = config.age.secrets.wgRaccoon.path;
# Public key: QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=
peers = [
{
name = "fox";
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
allowedIPs = [ "10.106.0.1/32" ];
endpoint = "fox.ac.upc.edu:666";
persistentKeepalive = 25;
}
{
name = "apex";
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
allowedIPs = [ "10.106.0.30/32" "10.0.40.0/24" ];
endpoint = "ssfhead.bsc.es:666";
persistentKeepalive = 25;
}
];
};
};
networking.hosts = {
"10.106.0.1" = [ "fox.wg" ];
"10.106.0.30" = [ "apex.wg" ];
};
}

View File

@@ -1,14 +0,0 @@
modules:
http_2xx:
prober: http
timeout: 5s
http:
preferred_ip_protocol: "ip4"
follow_redirects: true
valid_status_codes: [] # Defaults to 2xx
method: GET
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"

View File

@@ -1,85 +0,0 @@
{ config, pkgs, lib, ... }:
{
imports = [
../common/xeon.nix
../common/ssf/hosts.nix
../module/emulation.nix
../module/debuginfod.nix
./monitoring.nix
./nginx.nix
./nix-serve.nix
./gitlab-runner.nix
./gitea.nix
../hut/public-inbox.nix
../hut/msmtp.nix
../module/p.nix
../module/vpn-dac.nix
../module/hut-substituter.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675";
networking = {
hostName = "tent";
interfaces.eno1.ipv4.addresses = [
{
address = "10.0.44.4";
prefixLength = 24;
}
];
# Only BSC DNSs seem to be reachable from the office VLAN
nameservers = [ "84.88.52.35" "84.88.52.36" ];
search = [ "bsc.es" "ac.upc.edu" ];
defaultGateway = "10.0.44.1";
hosts = {
"84.88.53.236" = [ "apex" ];
"10.0.44.1" = [ "raccoon" ];
};
};
services.p.enable = true;
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
boot.swraid = {
enable = true;
mdadmConf = ''
DEVICE partitions
ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d
MAILADDR root
'';
};
fileSystems."/vault" = {
device = "/dev/disk/by-label/vault";
fsType = "ext4";
};
# Make a /vault/$USER directory for each user.
systemd.services.create-vault-dirs = let
# Take only normal users in tent
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0711 /vault/home/${user.name}"
]) users);
script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "local-fs.target" ];
after = [ "local-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# disable automatic garbage collector
nix.gc.automatic = lib.mkForce false;
}

View File

@@ -1,30 +0,0 @@
{ config, lib, ... }:
{
services.gitea = {
enable = true;
appName = "Gitea in the jungle";
settings = {
server = {
ROOT_URL = "https://jungle.bsc.es/git/";
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
LANDING_PAGE = "explore";
};
metrics.ENABLED = true;
service = {
DISABLE_REGISTRATION = true;
REGISTER_MANUAL_CONFIRM = true;
ENABLE_NOTIFY_MAIL = true;
};
log.LEVEL = "Warn";
mailer = {
ENABLED = true;
FROM = "jungle-robot@bsc.es";
PROTOCOL = "sendmail";
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
SENDMAIL_ARGS = "--";
};
};
};
}

View File

@@ -1,93 +0,0 @@
{ pkgs, lib, config, ... }:
{
age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age;
age.secrets.tent-gitlab-runner-pm-docker.file = ../../secrets/tent-gitlab-runner-pm-docker-token.age;
age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age;
services.gitlab-runner = let sec = config.age.secrets; in {
enable = true;
settings.concurrent = 5;
services = {
# For gitlab.pm.bsc.es
gitlab-pm-shell = {
executor = "shell";
environmentVariables = {
SHELL = "${pkgs.bash}/bin/bash";
};
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-shell.path;
preGetSourcesScript = pkgs.writeScript "setup" ''
echo "This is the preGetSources script running, brace for impact"
env
'';
};
gitlab-pm-docker = {
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-docker.path;
executor = "docker";
dockerImage = "debian:stable";
};
# For gitlab.bsc.es
gitlab-bsc-docker = {
# gitlab.bsc.es still uses the old token mechanism
registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path;
tagList = [ "docker" "tent" "nix" ];
executor = "docker";
dockerImage = "alpine";
dockerVolumes = [
"/nix/store:/nix/store:ro"
"/nix/var/nix/db:/nix/var/nix/db:ro"
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
];
dockerDisableCache = true;
registrationFlags = [
# Increase build log length to 64 MiB
"--output-limit 65536"
];
preBuildScript = pkgs.writeScript "setup-container" ''
mkdir -p -m 0755 /nix/var/log/nix/drvs
mkdir -p -m 0755 /nix/var/nix/gcroots
mkdir -p -m 0755 /nix/var/nix/profiles
mkdir -p -m 0755 /nix/var/nix/temproots
mkdir -p -m 0755 /nix/var/nix/userpool
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
mkdir -p -m 0700 "$HOME/.nix-defexpr"
mkdir -p -m 0700 "$HOME/.ssh"
cat >> "$HOME/.ssh/known_hosts" << EOF
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
EOF
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
# Required to load SSL certificate paths
. ${pkgs.cacert}/nix-support/setup-hook
'';
environmentVariables = {
ENV = "/etc/profile";
USER = "root";
NIX_REMOTE = "daemon";
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
};
};
};
};
systemd.services.gitlab-runner.serviceConfig = {
DynamicUser = lib.mkForce false;
User = "gitlab-runner";
Group = "gitlab-runner";
ExecStart = lib.mkForce
''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}'';
};
users.users.gitlab-runner = {
uid = config.ids.uids.gitlab-runner;
home = "/var/lib/gitlab-runner";
description = "Gitlab Runner";
group = "gitlab-runner";
extraGroups = [ "docker" ];
createHome = true;
};
users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner;
}

View File

@@ -1,217 +0,0 @@
{ config, lib, pkgs, ... }:
{
imports = [
../module/meteocat-exporter.nix
../module/upc-qaire-exporter.nix
../module/nix-daemon-exporter.nix
];
age.secrets.grafanaJungleRobotPassword = {
file = ../../secrets/jungle-robot-password.age;
owner = "grafana";
mode = "400";
};
services.grafana = {
enable = true;
settings = {
server = {
domain = "jungle.bsc.es";
root_url = "%(protocol)s://%(domain)s/grafana";
serve_from_sub_path = true;
http_port = 2342;
http_addr = "127.0.0.1";
};
smtp = {
enabled = true;
from_address = "jungle-robot@bsc.es";
user = "jungle-robot";
# Read the password from a file, which is only readable by grafana user
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
host = "mail.bsc.es:465";
startTLS_policy = "NoStartTLS";
};
feature_toggles.publicDashboards = true;
"auth.anonymous".enabled = true;
log.level = "warn";
};
};
services.prometheus = {
enable = true;
port = 9001;
retentionTime = "5y";
listenAddress = "127.0.0.1";
};
# We need access to the devices to monitor the disk space
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
# Credentials for IPMI exporter
age.secrets.ipmiYml = {
file = ../../secrets/ipmi.yml.age;
owner = "ipmi-exporter";
};
# Create an IPMI group and assign the ipmi0 device
users.groups.ipmi = {};
services.udev.extraRules = ''
SUBSYSTEM=="ipmi", KERNEL=="ipmi0", GROUP="ipmi", MODE="0660"
'';
# Add a new ipmi-exporter user that can read the ipmi0 device
users.users.ipmi-exporter = {
isSystemUser = true;
group = "ipmi";
};
# Disable dynamic user so we have the ipmi-exporter user available for the credentials
systemd.services.prometheus-ipmi-exporter.serviceConfig = {
DynamicUser = lib.mkForce false;
PrivateDevices = lib.mkForce false;
User = lib.mkForce "ipmi-exporter";
Group = lib.mkForce "ipmi";
RestrictNamespaces = lib.mkForce false;
# Fake uid to 0 so it shuts up
ExecStart = let
cfg = config.services.prometheus.exporters.ipmi;
in lib.mkForce (lib.concatStringsSep " " ([
"${pkgs.util-linux}/bin/unshare --map-user 0"
"${pkgs.prometheus-ipmi-exporter}/bin/ipmi_exporter"
"--web.listen-address ${cfg.listenAddress}:${toString cfg.port}"
"--config.file ${lib.escapeShellArg cfg.configFile}"
] ++ cfg.extraFlags));
};
services.prometheus = {
exporters = {
ipmi = {
enable = true;
configFile = config.age.secrets.ipmiYml.path;
#extraFlags = [ "--log.level=debug" ];
listenAddress = "127.0.0.1";
};
node = {
enable = true;
enabledCollectors = [ "logind" ];
port = 9002;
listenAddress = "127.0.0.1";
};
blackbox = {
enable = true;
listenAddress = "127.0.0.1";
configFile = ./blackbox.yml;
};
};
scrapeConfigs = [
{
job_name = "local";
static_configs = [{
targets = [
"127.0.0.1:9002" # Node exporter
#"127.0.0.1:9115" # Blackbox exporter
"127.0.0.1:9290" # IPMI exporter for local node
"127.0.0.1:9928" # UPC Qaire custom exporter
"127.0.0.1:9929" # Meteocat custom exporter
"127.0.0.1:9999" # Nix-daemon custom exporter
];
}];
}
{
job_name = "blackbox-http";
metrics_path = "/probe";
params = { module = [ "http_2xx" ]; };
static_configs = [{
targets = [
"https://www.google.com/robots.txt"
"https://pm.bsc.es/"
"https://pm.bsc.es/gitlab/"
"https://jungle.bsc.es/"
"https://gitlab.bsc.es/"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:9115";
}
];
}
{
job_name = "blackbox-icmp";
metrics_path = "/probe";
params = { module = [ "icmp" ]; };
static_configs = [{
targets = [
"1.1.1.1"
"8.8.8.8"
"ssfhead"
"raccoon"
"anella-bsc.cesca.cat"
"upc-anella.cesca.cat"
"fox.ac.upc.edu"
"fox-ipmi.ac.upc.edu"
"arenys5.ac.upc.edu"
"arenys0-2.ac.upc.edu"
"epi01.bsc.es"
"axle.bsc.es"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:9115";
}
];
}
{
job_name = "ipmi-raccoon";
metrics_path = "/ipmi";
static_configs = [
{ targets = [ "127.0.0.1:9290" ]; }
];
params = {
target = [ "raccoon-ipmi" ];
module = [ "raccoon" ];
};
}
{
job_name = "ipmi-fox";
metrics_path = "/ipmi";
static_configs = [
{ targets = [ "127.0.0.1:9290" ]; }
];
params = {
target = [ "fox-ipmi.ac.upc.edu" ];
module = [ "fox" ];
};
}
];
};
}

View File

@@ -1,79 +0,0 @@
{ theFlake, pkgs, ... }:
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = pkgs.fetchgit {
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
};
buildInputs = [ pkgs.hugo ];
buildPhase = ''
rm -rf public/
hugo
'';
installPhase = ''
cp -r public $out
'';
# Don't mess doc/
dontFixup = true;
};
in
{
networking.firewall.allowedTCPPorts = [ 80 ];
services.nginx = {
enable = true;
virtualHosts."jungle.bsc.es" = {
root = "${website}";
listen = [
{
addr = "0.0.0.0";
port = 80;
}
];
extraConfig = ''
set_real_ip_from 127.0.0.1;
set_real_ip_from 84.88.52.107;
real_ip_recursive on;
real_ip_header X-Forwarded-For;
location /git {
rewrite ^/git$ / break;
rewrite ^/git/(.*) /$1 break;
proxy_pass http://127.0.0.1:3000;
proxy_redirect http:// $scheme://;
}
location /cache {
rewrite ^/cache/(.*) /$1 break;
proxy_pass http://127.0.0.1:5000;
proxy_redirect http:// $scheme://;
}
location /lists {
proxy_pass http://127.0.0.1:8081;
proxy_redirect http:// $scheme://;
}
location /grafana {
proxy_pass http://127.0.0.1:2342;
proxy_redirect http:// $scheme://;
proxy_set_header Host $host;
# Websockets
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ ^/~(.+?)(/.*)?$ {
alias /vault/home/$1/public_html$2;
index index.html index.htm;
autoindex on;
absolute_redirect off;
}
location /p/ {
alias /var/lib/p/;
}
location /pub/ {
alias /vault/pub/;
}
'';
};
};
}

View File

@@ -1,16 +0,0 @@
{ config, ... }:
{
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
services.nix-serve = {
enable = true;
# Only listen locally, as we serve it via ssh
bindAddress = "127.0.0.1";
port = 5000;
secretKeyFile = config.age.secrets.nixServe.path;
# Public key:
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
};
}

View File

@@ -1,33 +0,0 @@
{ lib, ... }:
{
imports = [
../common/ssf.nix
../module/hut-substituter.nix
];
# Select this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca";
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
# Users with sudo access
users.groups.wheel.members = [ "abonerib" "anavarro" ];
# Run julia installed with juliaup using julia's own libraries:
# NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia
programs.nix-ld.enable = true;
networking = {
hostName = "weasel";
interfaces.eno1.ipv4.addresses = [ {
address = "10.0.40.6";
prefixLength = 24;
} ];
interfaces.ibp5s0.ipv4.addresses = [ {
address = "10.0.42.6";
prefixLength = 24;
} ];
};
}

View File

@@ -1,89 +0,0 @@
{ stdenv
, lib
, curl
, cacert
, runCommandLocal
, autoPatchelfHook
, elfutils
, glib
, libGL
, ncurses5
, xorg
, zlib
, libxkbcommon
, freetype
, fontconfig
, libGLU
, dbus
, rocmPackages
, libxcrypt-legacy
, numactl
, radare2
}:
let
version = "5.1.701";
tarball = "AMDuProf_Linux_x64_${version}.tar.bz2";
# NOTE: Remember to update the radare2 patch below if AMDuProfPcm changes.
uprofSrc = runCommandLocal tarball {
nativeBuildInputs = [ curl ];
outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk=";
SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt";
} ''
curl \
-o $out \
'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \
-H 'Accept-Language: en-US,en;q=0.5' \
-H 'Accept-Encoding: gzip, deflate, br, zstd' \
-H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n'
'';
in
stdenv.mkDerivation {
pname = "AMD-uProf";
inherit version;
src = uprofSrc;
dontStrip = true;
phases = [ "installPhase" "fixupPhase" ];
nativeBuildInputs = [ autoPatchelfHook radare2 ];
buildInputs = [
stdenv.cc.cc.lib
ncurses5
elfutils
glib
libGL
libGLU
libxcrypt-legacy
xorg.libX11
xorg.libXext
xorg.libXi
xorg.libXmu
xorg.libxcb
xorg.xcbutilwm
xorg.xcbutilrenderutil
xorg.xcbutilkeysyms
xorg.xcbutilimage
fontconfig.lib
libxkbcommon
zlib
freetype
dbus
rocmPackages.rocprofiler
numactl
];
installPhase = ''
set -x
mkdir -p $out
tar -x -v -C $out --strip-components=1 -f $src
rm $out/bin/AMDPowerProfilerDriverSource.tar.gz
patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so
patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys
echo "16334a51fcc48668307ad94e20482ca4 $out/bin/AMDuProfPcm" | md5sum -c -
radare2 -w -q -i ${./libnuma.r2} $out/bin/AMDuProfPcm
patchelf --add-needed libnuma.so $out/bin/AMDuProfPcm
set +x
'';
}

View File

@@ -1,33 +0,0 @@
{ stdenv
, lib
, amd-uprof
, kernel
, runCommandLocal
}:
let
version = amd-uprof.version;
tarball = amd-uprof.src;
in stdenv.mkDerivation {
pname = "AMDPowerProfilerDriver";
inherit version;
src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } ''
set -x
tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz
mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out
set +x
'';
hardeningDisable = [ "pic" "format" ];
nativeBuildInputs = kernel.moduleBuildDependencies;
patches = [ ./makefile.patch ./hrtimer.patch ];
makeFlags = [
"KERNEL_VERSION=${kernel.modDirVersion}"
"KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
"INSTALL_MOD_PATH=$(out)"
];
meta = {
description = "AMD Power Profiler Driver";
homepage = "https://www.amd.com/es/developer/uprof.html";
platforms = lib.platforms.linux;
};
}

View File

@@ -1,31 +0,0 @@
--- a/src/PmcTimerConfig.c 2025-09-04 12:17:16.771707049 +0200
+++ b/src/PmcTimerConfig.c 2025-09-04 12:17:04.878515468 +0200
@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo)
DRVPRINT("pTimerConfig(%p)", pTimerConfig);
- hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+ hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
}
int PmcSetupTimer(ClientContext* pClientCtx)
@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient
{
/* Interval in ms */
pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000);
- pTimerConfig->m_hrTimer.function = PmcTimerCallback;
DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time);
}
--- a/src/PwrProfTimer.c 2025-09-04 12:18:08.750544327 +0200
+++ b/src/PwrProfTimer.c 2025-09-04 12:18:28.557863382 +0200
@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu)
pCoreClientData = &per_cpu(g_coreClientData, cpu);
// initialize HR timer
- hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- pCoreClientData->m_hrTimer.function = &HrTimerCallback;
+ hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
return;
} // InitHrTimer

View File

@@ -1,10 +0,0 @@
# Patch arguments to call sym std::string::find(char const*, unsigned long, unsigned long)
# so it matches NixOS:
#
# Change OS name to NixOS
wz NixOS @ 0x00550a43
# And set the length to 5 characters
wa mov ecx, 5 @0x00517930
#
# Then change the argument to dlopen() so it only uses libnuma.so
wz libnuma.so @ 0x00562940

View File

@@ -1,66 +0,0 @@
--- a/Makefile 2025-06-19 20:36:49.346693267 +0200
+++ b/Makefile 2025-06-19 20:42:29.778088660 +0200
@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi
MODULE_NAME_KO=$(MODULE_NAME).ko
# check is module inserted
-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
+#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
# check pcore dkms status
PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION))
@@ -50,7 +50,7 @@ endif
# “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00
G_VERSION=9
K_VERSION=5
-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.)
+KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.)
GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.)
ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION))))
ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION))))
@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs := src/PmcDataBuffe
# make
all:
- @chmod a+x ./AMDPPcert.sh
- @./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status;
- @SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
- if [ $$SIGSTATUS1 -eq 1 ]; then \
- exit 1; \
- fi
- @make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules
- @SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \
- if [ $$SIGSTATUS3 -eq 0 ]; then \
- ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \
- fi
+ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules
# make clean
clean:
@@ -84,23 +74,9 @@ clean:
# make install
install:
- @mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra
- @rm -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO)
- @cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/
- @depmod -a
- @if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \
- echo "Uninstalling AMDPowerProfiler Linux kernel module.";\
- rmmod $(MODULE_NAME);\
- fi
- @modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \
- cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \
- echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
- if [ $$SIGSTATUS1 -eq 0 ]; then \
- echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \
- exit 1; \
- else \
- cat $(PWD)/sign_status1; \
- fi
+ mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
+ cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
+
# make dkms
dkms:
@chmod a+x ./AMDPPcert.sh

View File

@@ -1,12 +0,0 @@
HOSTCXX ?= g++
NVCC := nvcc -ccbin $(HOSTCXX)
CXXFLAGS := -m64
# Target rules
all: cudainfo
cudainfo: cudainfo.cpp
$(NVCC) $(CXXFLAGS) -o $@ $<
clean:
rm -f cudainfo cudainfo.o

View File

@@ -1,600 +0,0 @@
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
// Shared Utilities (QA Testing)
// std::system includes
#include <memory>
#include <iostream>
#include <cuda_runtime.h>
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
// CUDA Runtime error messages
#ifdef __DRIVER_TYPES_H__
static const char *_cudaGetErrorEnum(cudaError_t error)
{
switch (error)
{
case cudaSuccess:
return "cudaSuccess";
case cudaErrorMissingConfiguration:
return "cudaErrorMissingConfiguration";
case cudaErrorMemoryAllocation:
return "cudaErrorMemoryAllocation";
case cudaErrorInitializationError:
return "cudaErrorInitializationError";
case cudaErrorLaunchFailure:
return "cudaErrorLaunchFailure";
case cudaErrorPriorLaunchFailure:
return "cudaErrorPriorLaunchFailure";
case cudaErrorLaunchTimeout:
return "cudaErrorLaunchTimeout";
case cudaErrorLaunchOutOfResources:
return "cudaErrorLaunchOutOfResources";
case cudaErrorInvalidDeviceFunction:
return "cudaErrorInvalidDeviceFunction";
case cudaErrorInvalidConfiguration:
return "cudaErrorInvalidConfiguration";
case cudaErrorInvalidDevice:
return "cudaErrorInvalidDevice";
case cudaErrorInvalidValue:
return "cudaErrorInvalidValue";
case cudaErrorInvalidPitchValue:
return "cudaErrorInvalidPitchValue";
case cudaErrorInvalidSymbol:
return "cudaErrorInvalidSymbol";
case cudaErrorMapBufferObjectFailed:
return "cudaErrorMapBufferObjectFailed";
case cudaErrorUnmapBufferObjectFailed:
return "cudaErrorUnmapBufferObjectFailed";
case cudaErrorInvalidHostPointer:
return "cudaErrorInvalidHostPointer";
case cudaErrorInvalidDevicePointer:
return "cudaErrorInvalidDevicePointer";
case cudaErrorInvalidTexture:
return "cudaErrorInvalidTexture";
case cudaErrorInvalidTextureBinding:
return "cudaErrorInvalidTextureBinding";
case cudaErrorInvalidChannelDescriptor:
return "cudaErrorInvalidChannelDescriptor";
case cudaErrorInvalidMemcpyDirection:
return "cudaErrorInvalidMemcpyDirection";
case cudaErrorAddressOfConstant:
return "cudaErrorAddressOfConstant";
case cudaErrorTextureFetchFailed:
return "cudaErrorTextureFetchFailed";
case cudaErrorTextureNotBound:
return "cudaErrorTextureNotBound";
case cudaErrorSynchronizationError:
return "cudaErrorSynchronizationError";
case cudaErrorInvalidFilterSetting:
return "cudaErrorInvalidFilterSetting";
case cudaErrorInvalidNormSetting:
return "cudaErrorInvalidNormSetting";
case cudaErrorMixedDeviceExecution:
return "cudaErrorMixedDeviceExecution";
case cudaErrorCudartUnloading:
return "cudaErrorCudartUnloading";
case cudaErrorUnknown:
return "cudaErrorUnknown";
case cudaErrorNotYetImplemented:
return "cudaErrorNotYetImplemented";
case cudaErrorMemoryValueTooLarge:
return "cudaErrorMemoryValueTooLarge";
case cudaErrorInvalidResourceHandle:
return "cudaErrorInvalidResourceHandle";
case cudaErrorNotReady:
return "cudaErrorNotReady";
case cudaErrorInsufficientDriver:
return "cudaErrorInsufficientDriver";
case cudaErrorSetOnActiveProcess:
return "cudaErrorSetOnActiveProcess";
case cudaErrorInvalidSurface:
return "cudaErrorInvalidSurface";
case cudaErrorNoDevice:
return "cudaErrorNoDevice";
case cudaErrorECCUncorrectable:
return "cudaErrorECCUncorrectable";
case cudaErrorSharedObjectSymbolNotFound:
return "cudaErrorSharedObjectSymbolNotFound";
case cudaErrorSharedObjectInitFailed:
return "cudaErrorSharedObjectInitFailed";
case cudaErrorUnsupportedLimit:
return "cudaErrorUnsupportedLimit";
case cudaErrorDuplicateVariableName:
return "cudaErrorDuplicateVariableName";
case cudaErrorDuplicateTextureName:
return "cudaErrorDuplicateTextureName";
case cudaErrorDuplicateSurfaceName:
return "cudaErrorDuplicateSurfaceName";
case cudaErrorDevicesUnavailable:
return "cudaErrorDevicesUnavailable";
case cudaErrorInvalidKernelImage:
return "cudaErrorInvalidKernelImage";
case cudaErrorNoKernelImageForDevice:
return "cudaErrorNoKernelImageForDevice";
case cudaErrorIncompatibleDriverContext:
return "cudaErrorIncompatibleDriverContext";
case cudaErrorPeerAccessAlreadyEnabled:
return "cudaErrorPeerAccessAlreadyEnabled";
case cudaErrorPeerAccessNotEnabled:
return "cudaErrorPeerAccessNotEnabled";
case cudaErrorDeviceAlreadyInUse:
return "cudaErrorDeviceAlreadyInUse";
case cudaErrorProfilerDisabled:
return "cudaErrorProfilerDisabled";
case cudaErrorProfilerNotInitialized:
return "cudaErrorProfilerNotInitialized";
case cudaErrorProfilerAlreadyStarted:
return "cudaErrorProfilerAlreadyStarted";
case cudaErrorProfilerAlreadyStopped:
return "cudaErrorProfilerAlreadyStopped";
/* Since CUDA 4.0*/
case cudaErrorAssert:
return "cudaErrorAssert";
case cudaErrorTooManyPeers:
return "cudaErrorTooManyPeers";
case cudaErrorHostMemoryAlreadyRegistered:
return "cudaErrorHostMemoryAlreadyRegistered";
case cudaErrorHostMemoryNotRegistered:
return "cudaErrorHostMemoryNotRegistered";
/* Since CUDA 5.0 */
case cudaErrorOperatingSystem:
return "cudaErrorOperatingSystem";
case cudaErrorPeerAccessUnsupported:
return "cudaErrorPeerAccessUnsupported";
case cudaErrorLaunchMaxDepthExceeded:
return "cudaErrorLaunchMaxDepthExceeded";
case cudaErrorLaunchFileScopedTex:
return "cudaErrorLaunchFileScopedTex";
case cudaErrorLaunchFileScopedSurf:
return "cudaErrorLaunchFileScopedSurf";
case cudaErrorSyncDepthExceeded:
return "cudaErrorSyncDepthExceeded";
case cudaErrorLaunchPendingCountExceeded:
return "cudaErrorLaunchPendingCountExceeded";
case cudaErrorNotPermitted:
return "cudaErrorNotPermitted";
case cudaErrorNotSupported:
return "cudaErrorNotSupported";
/* Since CUDA 6.0 */
case cudaErrorHardwareStackError:
return "cudaErrorHardwareStackError";
case cudaErrorIllegalInstruction:
return "cudaErrorIllegalInstruction";
case cudaErrorMisalignedAddress:
return "cudaErrorMisalignedAddress";
case cudaErrorInvalidAddressSpace:
return "cudaErrorInvalidAddressSpace";
case cudaErrorInvalidPc:
return "cudaErrorInvalidPc";
case cudaErrorIllegalAddress:
return "cudaErrorIllegalAddress";
/* Since CUDA 6.5*/
case cudaErrorInvalidPtx:
return "cudaErrorInvalidPtx";
case cudaErrorInvalidGraphicsContext:
return "cudaErrorInvalidGraphicsContext";
case cudaErrorStartupFailure:
return "cudaErrorStartupFailure";
case cudaErrorApiFailureBase:
return "cudaErrorApiFailureBase";
}
return "<unknown>";
}
#endif
template< typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
if (result)
{
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
cudaDeviceReset();
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
int *pArgc = NULL;
char **pArgv = NULL;
#if CUDART_VERSION < 5000
// CUDA-C includes
#include <cuda.h>
// This function wraps the CUDA Driver API into a template function
template <class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
{
CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
if (CUDA_SUCCESS != error) {
fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
error, __FILE__, __LINE__);
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
exit(EXIT_FAILURE);
}
}
#endif /* CUDART_VERSION < 5000 */
// Beginning of GPU Architecture definitions
inline int ConvertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] = {
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
return nGpuArchCoresPerSM[index-1].Cores;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
pArgc = &argc;
pArgv = argv;
printf("%s Starting...\n\n", argv[0]);
printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
int deviceCount = 0;
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
if (error_id != cudaSuccess) {
printf("cudaGetDeviceCount failed: %s (%d)\n",
cudaGetErrorString(error_id), (int) error_id);
printf("Result = FAIL\n");
exit(EXIT_FAILURE);
}
// This function call returns 0 if there are no CUDA capable devices.
if (deviceCount == 0)
printf("There are no available device(s) that support CUDA\n");
else
printf("Detected %d CUDA Capable device(s)\n", deviceCount);
int dev, driverVersion = 0, runtimeVersion = 0;
for (dev = 0; dev < deviceCount; ++dev) {
cudaSetDevice(dev);
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
// Console log
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);
printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n",
(float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",
deviceProp.multiProcessorCount,
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
#if CUDART_VERSION >= 5000
// This is supported in CUDA 5.0 (runtime API device properties)
printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
if (deviceProp.l2CacheSize) {
printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);
}
#else
// This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
int memoryClock;
getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);
int memBusWidth;
getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
printf(" Memory Bus Width: %d-bit\n", memBusWidth);
int L2CacheSize;
getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
if (L2CacheSize) {
printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
}
#endif
printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",
deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);
printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
printf(" Warp size: %d\n", deviceProp.warpSize);
printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
deviceProp.maxThreadsDim[0],
deviceProp.maxThreadsDim[1],
deviceProp.maxThreadsDim[2]);
printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
deviceProp.maxGridSize[0],
deviceProp.maxGridSize[1],
deviceProp.maxGridSize[2]);
printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);
printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);
printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
#endif
printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
const char *sComputeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
"Prohibited (no host thread can use ::cudaSetDevice() with this device)",
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
"Unknown",
NULL
};
printf(" Compute Mode:\n");
printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
}
// If there are 2 or more GPUs, query to determine whether RDMA is supported
if (deviceCount >= 2)
{
cudaDeviceProp prop[64];
int gpuid[64]; // we want to find the first two GPU's that can support P2P
int gpu_p2p_count = 0;
for (int i=0; i < deviceCount; i++)
{
checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
// Only boards based on Fermi or later can support P2P
if ((prop[i].major >= 2)
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
&& prop[i].tccDriver
#endif
)
{
// This is an array of P2P capable GPUs
gpuid[gpu_p2p_count++] = i;
}
}
// Show all the combinations of support P2P GPUs
int can_access_peer_0_1, can_access_peer_1_0;
if (gpu_p2p_count >= 2)
{
for (int i = 0; i < gpu_p2p_count-1; i++)
{
for (int j = 1; j < gpu_p2p_count; j++)
{
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
prop[gpuid[j]].name, gpuid[j] ,
can_access_peer_0_1 ? "Yes" : "No");
}
}
for (int j = 1; j < gpu_p2p_count; j++)
{
for (int i = 0; i < gpu_p2p_count-1; i++)
{
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
prop[gpuid[i]].name, gpuid[i] ,
can_access_peer_1_0 ? "Yes" : "No");
}
}
}
}
// csv masterlog info
// *****************************
// exe and CUDA driver name
printf("\n");
std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
char cTemp[128];
// driver version
sProfileString += ", CUDA Driver Version = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
#else
sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
#endif
sProfileString += cTemp;
// Runtime version
sProfileString += ", CUDA Runtime Version = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
#else
sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
#endif
sProfileString += cTemp;
// Device count
sProfileString += ", NumDevs = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d", deviceCount);
#else
sprintf(cTemp, "%d", deviceCount);
#endif
sProfileString += cTemp;
// Print Out all device Names
for (dev = 0; dev < deviceCount; ++dev)
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 13, ", Device%d = ", dev);
#else
sprintf(cTemp, ", Device%d = ", dev);
#endif
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
sProfileString += cTemp;
sProfileString += deviceProp.name;
}
sProfileString += "\n";
printf("%s", sProfileString.c_str());
printf("Result = PASS\n");
// finish
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
return 0;
}

View File

@@ -1,43 +0,0 @@
{
stdenv
, cudatoolkit
, cudaPackages
, autoAddDriverRunpath
, strace
}:
stdenv.mkDerivation (finalAttrs: {
name = "cudainfo";
src = ./.;
buildInputs = [
cudatoolkit # Required for nvcc
cudaPackages.cuda_cudart.static # Required for -lcudart_static
autoAddDriverRunpath
];
installPhase = ''
mkdir -p $out/bin
cp -a cudainfo $out/bin
'';
passthru.gpuCheck = stdenv.mkDerivation {
name = "cudainfo-test";
requiredSystemFeatures = [ "cuda" ];
dontBuild = true;
nativeCheckInputs = [
finalAttrs.finalPackage # The cudainfo package from above
strace # When it fails, it will show the trace
];
dontUnpack = true;
doCheck = true;
checkPhase = ''
if ! cudainfo; then
set -x
cudainfo=$(command -v cudainfo)
ldd $cudainfo
readelf -d $cudainfo
strace -f $cudainfo
set +x
fi
'';
installPhase = "touch $out";
};
})

View File

@@ -1,25 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "meteocat-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
beautifulsoup4
lxml
prometheus-client
];
meta = with lib; {
description = "MeteoCat Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,54 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
from bs4 import BeautifulSoup
from urllib import request
# Configuration -------------------------------------------
meteo_station = "X8" # Barcelona - Zona Universitària
listening_port = 9929
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_tmin = Gauge('meteocat_temp_min', 'Min temperature')
metric_tmax = Gauge('meteocat_temp_max', 'Max temperature')
metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature')
metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation')
def update(st):
url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st
response = request.urlopen(url)
data = response.read()
soup = BeautifulSoup(data, 'lxml')
table = soup.find("table", {"class" : "tblperiode"})
rows = table.find_all('tr')
row = rows[-1] # Take the last row
row_data = []
header = row.find('th')
header_text = header.text.strip()
row_data.append(header_text)
for col in row.find_all('td'):
row_data.append(col.text)
try:
# Sometimes it will return '(s/d)' and fail to parse
metric_tavg.set(float(row_data[1]))
metric_tmax.set(float(row_data[2]))
metric_tmin.set(float(row_data[3]))
metric_srad.set(float(row_data[10]))
#print("ok: temp_avg={}".format(float(row_data[1])))
except:
print("cannot parse row: {}".format(row))
metric_tavg.set(float("nan"))
metric_tmax.set(float("nan"))
metric_tmin.set(float("nan"))
metric_srad.set(float("nan"))
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
update(meteo_station)
except:
print("update failed")
time.sleep(update_period)

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='meteocat-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["meteocat-exporter"],
)

View File

@@ -0,0 +1,36 @@
diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
index 33e88bc..ee3641c 100644
--- a/src/util/mpir_hwtopo.c
+++ b/src/util/mpir_hwtopo.c
@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
#ifdef HAVE_HWLOC
bindset = hwloc_bitmap_alloc();
hwloc_topology_init(&hwloc_topology);
- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
- if (xmlfile != NULL) {
- int rc;
- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
- if (rc == 0) {
- /* To have hwloc still actually call OS-specific hooks, the
- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
- * file is really the underlying system. */
- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
- }
- MPL_free(xmlfile);
- }
hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
if (!hwloc_topology_load(hwloc_topology))
--- a/src/mpi/init/local_proc_attrs.c
+++ b/src/mpi/init/local_proc_attrs.c
@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
/* Set the number of tag bits. The device may override this value. */
MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
- MPL_free(requested_kinds);
-
return mpi_errno;
}

View File

@@ -11,6 +11,10 @@ final: prev:
paths = [ pmix.dev pmix.out ];
};
in prev.mpich.overrideAttrs (old: {
patches = [
# See https://github.com/pmodels/mpich/issues/6946
./mpich-fix-hwtopo.patch
];
buildInput = old.buildInputs ++ [
libfabric
pmixAll
@@ -35,33 +39,7 @@ final: prev:
# See https://bugs.schedmd.com/show_bug.cgi?id=19324
./slurm-rank-expansion.patch
];
# Install also the pam_slurm_adopt library to restrict users from accessing
# nodes with no job allocated.
postBuild = (old.postBuild or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security"
popd
'';
postInstall = (old.postInstall or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security" install
popd
'';
});
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
cudainfo = prev.callPackage ./cudainfo/default.nix { };
amd-uprof = prev.callPackage ./amd-uprof/default.nix { };
# FIXME: Extend this to all linuxPackages variants. Open problem, see:
# https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636
linuxPackages = prev.linuxPackages.extend (_final: _prev: {
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
});
linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: {
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
});
}

View File

@@ -1,24 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "upc-qaire-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
prometheus-client
requests
];
meta = with lib; {
description = "UPC Qaire Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='upc-qaire-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["upc-qaire-exporter"],
)

View File

@@ -1,74 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
import requests, json
from datetime import datetime, timedelta
# Configuration -------------------------------------------
listening_port = 9928
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor')
def genparams():
d = {}
d['topic'] = 'TEMPERATURE'
d['shift_dates_to'] = ''
d['datapoints'] = 301
d['devicesAndColors'] = '1148418@@@#40ACB6'
now = datetime.now()
d['fromDate'] = now.strftime('%d/%m/%Y')
d['toDate'] = now.strftime('%d/%m/%Y')
d['serviceFrequency'] = 'NONE'
# WTF!
for i in range(7):
for j in range(48):
key = 'week.days[{}].hours[{}].value'.format(i, j)
d[key] = 'OPEN'
return d
def measure():
# First we need to load session
s = requests.Session()
r = s.get("https://upc.edu/sirena")
if r.status_code != 200:
print("bad HTTP status code on new session: {}".format(r.status_code))
return
if s.cookies.get("JSESSIONID") is None:
print("cannot get JSESSIONID")
return
# Now we can pull the data
url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json"
r = s.post(url, data=genparams())
if r.status_code != 200:
print("bad HTTP status code on data: {}".format(r.status_code))
return
#print(r.text)
j = json.loads(r.content)
# Just take the last one
last = j['data']['chartElementList'][-1]
temp = last['values']['1148418-Temperatura']
return temp
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
metric_temp.set(measure())
except:
print("measure failed")
metric_temp.set(float("nan"))
time.sleep(update_period)

View File

@@ -1,25 +1,21 @@
age-encryption.org/v1
-> ssh-ed25519 AY8zKw /gmhFOFqOs8IobAImvQVKeM5Y6k0FpuR61/Cu5drVVI
g9FXJg2oIoien0zJ70FWHwSTM8SBwbpS188S3Swj7EM
-> ssh-ed25519 sgAamA opPjlWPhSiI0Rd5l7kd204S5FXFLcQcQftyKb7MDmnU
3XrRDVnglCP+vBwvfd1rP5gHttsGDHyXwbf10a8/kKY
-> ssh-ed25519 HY2yRg QKZbubM76C3tobPoyCFDRclA9Pzb2fC7s4WOoIgdORc
K5kckU0KhQFTE6SikJXFJgM41Tco5+VqOsaG0qLrY1Q
-> ssh-ed25519 fw2Xhg +ohqts8dLFjvdHxrGHcOGxU0dm+V3N//giljHkobpDM
jR/UzGrfS9lrJ/VeolKLxfzeJAf2fIB2pdIn/6ukqNk
-> ssh-ed25519 tcumPQ 3DPkDPIQQSVtXSLzIRETsIyXQ0k1o18Evn6vf+l/6R8
bLXF62OmJjnOT1vvgq3+AcOKKSG5NonrK5EqCVc0Mwo
-> ssh-ed25519 JJ1LWg 2Wefc7eLolMU5InEmCNTq21Mf71mI0a2N1HgDrlHvy4
qXFW9CQBnrzubZ0mzS0Io2WGRrwGBkmeYndBTcZn/fM
-> ssh-ed25519 cDBabA oiH36AoIt/fFFYgnoxtH7OoetP+2/wjtn8qo3RJDSHc
qKmkxy1aZGP4ZwC0iH7n7hiJ0+rFQYvjQb5O1a1Z0r4
-> ssh-ed25519 cK5kHw bX3RtO5StMejUYWAaA37fjHA5nO7Xs1vWDQk3yOjs2o
Egxmcf8FKAd+E5hMLmhV1yQsCo5rJyUazf1szOvpTAM
-> ssh-ed25519 CAWG4Q oKqqRDJH0w8lsoQBQk0w8PO+z5gFNmSaGBUSumvDp1I
m1zWp9MfViAmtpbJhqOHraIokDaPKb0DvvO4vAGCTWI
-> ssh-ed25519 xA739A G26kPOz6sbFATs+KAr7gbDvji13eA1smFusQAOJXMwA
Sppvz7A103kZoNxoGsd6eXeCvVh7mBE2MRwLFj9O1dY
-> ssh-ed25519 MSF3dg 55ekNcp+inbUd+GQ/VZ7BoBASaJ8YDqF74CVXy1PUxQ
aTHLLAbzQPWWld/OT3BKebc6FcmsqMTaWCPBGm1UHic
--- mVkAMnI9XQhS3fMiFuuXP/yLR9wEG9+Rr8pA4Uc0avY
<04>DU <20><>s<EFBFBD><73><EFBFBD><EFBFBD>j<EFBFBD><6A>M<EFBFBD><4D>$<24>[<5B>M<EFBFBD><4D><EFBFBD><03>[_<>K7s<37>ju<>v<EFBFBD>D<EFBFBD>4<EFBFBD>g<EFBFBD><67>܄3<>Gn<47><6E><EFBFBD> ɽ<>P<EFBFBD>7~rZs<><73>
-> ssh-ed25519 AY8zKw J00a6ZOhkupkhLU5WQ0kD05HEF4KKsSs2hwjHKbnnHU
J14VoNOCqLpScVO7OLXbqTcLI4tcVUHt5cqY/XQmbGs
-> ssh-ed25519 sgAamA k8R/bSUdvVmlBI6yHPi5NBQPBGM36lPJwsir8DFGgxE
4ZKC3gYvic6AVrNGgNjwztbUzhxP8ViX5O3wFo9wlrk
-> ssh-ed25519 HY2yRg 966xf2fTnA6Wq0uYXbXZQOManqITJcCbQS9LZCGEOh4
Qg5echQSrzqeDqvaMx+5fqi8XyTjAeCsY/UFJX6YnDs
-> ssh-ed25519 tcumPQ e0U2okrGIoUpLfPYjIRx1V92rE3hZW13nJef+l3kBQg
LejAUKBl+tPhwocCF00ZHTzFISnwX8og8GvemiMIcyo
-> ssh-ed25519 JJ1LWg QkzTsPq9Gdh+FNz/a4bDb9LQOreFyxeTC51UNd1fsj0
ayrlKenETfQzH1Z9drVEWqszQebicGVJve0/pCnxAE8
-> ssh-ed25519 CAWG4Q lJLW9+dxvyoD4hYzeXeE/4rzJ6HIeEQOB1+fbhV3xw0
T2RrVCtTuQvya9HiJB7txk3QGrntpsMX9Tt1cyXoW5E
-> ssh-ed25519 MSF3dg JOZkFb2CfqWKvZIz7lYxXWgv8iEVDkQF8hInDMZvknc
MHDWxjUw4dNiC1h4MrU9uKKcI3rwkxABm0+5FYMZkok
-> ~8m;7f-grease
lDIullfC98RhpTZ4Mk87Td+VtPmwPdgz+iIilpKugUkmV5r4Uqd7yE+5ArA6ekr/
G/X4EA
--- Cz4sv9ZunBcVdZCozdTh1zlg1zIASjk2MjYeYfcN9eA
<EFBFBD>N <09>$[H<><48>Q<EFBFBD><51><EFBFBD>
d<EFBFBD><EFBFBD><EFBFBD>'<27><><EFBFBD>7<EFBFBD><1F>Ͳ)<29><><EFBFBD><17>x9y<39><79><EFBFBD>E<04><><EFBFBD>M7^<5E>[<5B>M<EFBFBD>+<2B>&<26><><EFBFBD><0E>$8tM<74>в

View File

@@ -1,13 +1,9 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik
DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI
-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio
hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU
-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw
hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs
-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY
eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM
-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg
/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg
--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>YM<EFBFBD><EFBFBD>:E O<><4F>2<EFBFBD>r=<15>&4<><04>CQΣ<51><CEA3>hC<68><43><EFBFBD>cb<63>^Sy<53><79>% <09><>x-vC`g<><15><><EFBFBD><EFBFBD>W^<5E><>wVG <0B><><EFBFBD>
-> ssh-ed25519 HY2yRg DQdgCk16Yu524BsrWVf0krnwWzDM6SeaJCgQipOfwCA
Ab9ocqra/UWJZI+QGMlxUhBu5AzqfjPgXl+ENIiHYGs
-> ssh-ed25519 CAWG4Q KF9rGCenb3nf+wyz2hyVs/EUEbsmUs5R+1fBxlCibC8
7++Kxbr3FHVdVfnFdHYdAuR0Tgfd+sRcO6WRss6LhEw
-> ssh-ed25519 MSF3dg aUe4DhRsu4X8CFOEAnD/XM/o/0qHYSB522woCaAVh0I
GRcs5cm2YqA/lGhUtbpboBaz7mfgiLaCr+agaB7vACU
--- 9Q7Ou+Pxq+3RZilCb2dKC/pCFjZEt4rp5KnTUUU7WJ8
1<12>Mw4<77><34> <09>:H<>@<40>/<2F>gLtM<74>,<2C>ƥ<>*<2A><>z<EFBFBD>NV5<56>m<EFBFBD><6D>N<EFBFBD>o<EFBFBD><6F>j1 $<24>T<EFBFBD>G_<47>E{<7B>%<25><><17><>H<EFBFBD><EFBFBD>A<EFBFBD>p<EFBFBD>

Binary file not shown.

View File

@@ -1,13 +1,9 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg U2KQWviZIVNemm9e8h7H+eOzoYNxXgLLS3hsZLMAuGk
6n5dH1McNzk3rscP4v2pqZYDWtUFMd15rZsEd/mqIFM
-> ssh-ed25519 cK5kHw Ebrj/cpz1cFWAYAV9OxgyyH85OEMUnfUIV66p7jaoFY
6J7hWqODtS/fIF4BpxhxbrxZq5vbolvbLqRKqazT02M
-> ssh-ed25519 CAWG4Q mXqoQH9ycHF7u0y8mazCgynHxNLxTnrmQHke+2a5QCc
mq6PdSF+KOqthuXwzTCsOQsi5KG0z1wHUck+bSTyOBY
-> ssh-ed25519 xA739A TADeswueqDEroZWLjMw3RDNwVQ2xRD+JUMVZENovn0M
KFlnSjVFbjc+ZsbY8Ed7edC5B01TJGzd/dSryiLArPc
-> ssh-ed25519 MSF3dg Pq+ZD8AqJGDHDbd4PO1ngNFST8+6C2ghZkO/knKzzEc
wyiL/u38hdQMokmfTsBrY7CtYwc+31FG4EDaqVEn31U
--- 1z4cOipayh0zYkvasEVEvGreajegE/dqBV7b6E7aFh0
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>R<EFBFBD>@<40>/i<>I'<27><><EFBFBD>Nx<4E>r"<1D>`<1E>O<EFBFBD><4F><EFBFBD>y<><79>8<EFBFBD><38> \/<2F><>I<19><17>D<EFBFBD>`<60>ߓ<EFBFBD><DF93><EFBFBD><1E><04>uy<75><79><EFBFBD>:9Lt<4C><1D><><EFBFBD>؋<EFBFBD><D88B><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>AU<41><55><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>`<60>;<3B>q8<71>GLU#<23>i<EFBFBD>y<EFBFBD><79>i<03>ڜ
-> ssh-ed25519 HY2yRg WvKK6U1wQtx2pbUDfuaUIXTQiCulDkz7hgUCSwMfMzQ
jLktUMqKuVxukqzz++pHOKvmucUQqeKYy5IwBma7KxY
-> ssh-ed25519 CAWG4Q XKGuNNoYFl9bdZzsqYYTY7GsEt5sypLW4R+1uk78NmU
8dIA2GzRAwTGM5CDHSM2BUBsbXzEAUssWUz2PY2PaTg
-> ssh-ed25519 MSF3dg T630RsKuZIF/bp+KITnIIWWHsg6M/VQGqbWQZxqT+AA
SraZcgZJVtmUzHF/XR9J7aK5t5EDNpkC/av/WJUT/G8
--- /12G8pj9sbs591OM/ryhoLnSWWmzYcoqprk9uN/3g18
<EFBFBD><EFBFBD><EFBFBD><01>‡%<25>]yi"<22><><EFBFBD>L<EFBFBD> <0B><>H`<60>a$<24><>)<29>9ve<76>.0<EFBFBD>m<EFBFBD>K<EFBFBD>v<EFBFBD><EFBFBD> <0B>u"|1c<31>-%<25><>"<22>WF<12><><EFBFBD>A<EFBFBD><41>h<EFBFBD>$<05><>j<e<><65>x<EFBFBD>Lx<4C><78>.?<3F><><EFBFBD>:L<><4C><EFBFBD><EFBFBD>,<2C>u<EFBFBD>|<7C><>F|<7C>i<EFBFBD><69><EFBFBD>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -2,11 +2,6 @@ let
keys = import ../keys.nix;
adminsKeys = builtins.attrValues keys.admins;
hut = [ keys.hosts.hut ] ++ adminsKeys;
fox = [ keys.hosts.fox ] ++ adminsKeys;
apex = [ keys.hosts.apex ] ++ adminsKeys;
raccoon = [ keys.hosts.raccoon ] ++ adminsKeys;
mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
tent = [ keys.hosts.tent ] ++ adminsKeys;
# Only expose ceph keys to safe nodes and admins
safe = keys.hostGroup.safe ++ adminsKeys;
in
@@ -14,21 +9,9 @@ in
"gitea-runner-token.age".publicKeys = hut;
"gitlab-runner-docker-token.age".publicKeys = hut;
"gitlab-runner-shell-token.age".publicKeys = hut;
"gitlab-bsc-docker-token.age".publicKeys = hut;
"nix-serve.age".publicKeys = mon;
"jungle-robot-password.age".publicKeys = mon;
"ipmi.yml.age".publicKeys = mon;
"tent-gitlab-runner-pm-docker-token.age".publicKeys = tent;
"tent-gitlab-runner-pm-shell-token.age".publicKeys = tent;
"tent-gitlab-runner-bsc-docker-token.age".publicKeys = tent;
"vpn-dac-login.age".publicKeys = tent;
"vpn-dac-client-key.age".publicKeys = tent;
"nix-serve.age".publicKeys = hut;
"jungle-robot-password.age".publicKeys = hut;
"ceph-user.age".publicKeys = safe;
"munge-key.age".publicKeys = safe;
"wg-fox.age".publicKeys = fox;
"wg-apex.age".publicKeys = apex;
"wg-raccoon.age".publicKeys = raccoon;
}

View File

@@ -1,13 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w Zhbs+NM/SI49qQ0X8bBpWUWxYM0vUKCXNAnPpIE2NR0
CkBUmJ26EkwHztT8Pz0UGq2KZwN0Xz8iYQ9cEHL9OWQ
-> ssh-ed25519 cK5kHw 5KjUXJywRDp2A7l5ukTCS+WIAalxwP1f71ejGxwNrX4
JW8OLmfkULXo9AwYMGNyOgZ+nQ0MVc0PCM4kKPIo6V4
-> ssh-ed25519 CAWG4Q cVjY3R0ZHAfokA4kWlu5vOl2Gs7mdqRgRk4WSUOXAjg
IxEDvuximW99EqxmpW+Btpm0Zydmwg/u87bqnl26NYc
-> ssh-ed25519 xA739A hmuwZuxmJnuAjmU4X8yhPQ+hPWvN1G+ZS0pvD7fHamg
fnAPW6ZCrv5pSO4RQhhr8xz7ij7jAZJk0ApWluOXDng
-> ssh-ed25519 MSF3dg SSGLcWnum0Qo/0OnKDZVg9xAZMwGwVNYYmRJXxb4GU0
pdl6kATG7n2oMsoUboBfu+vDKurJcH1UvUa70rfMQkE
--- a2ZQAeAQlO9DWnegIAq6NpI1Po6f38l+hitZvq+zIW8
<EFBFBD>\ֺ"^<5E>DT<44>H<EFBFBD><48>3<EFBFBD><33><EFBFBD>_|.h<0E><><EFBFBD><EFBFBD><03>^<5E>n<14><0E><><EFBFBD><EFBFBD><1A>g<EFBFBD>S<EFBFBD>]_<><5F>?n<>z~2<>!<21>p7<70><37><<3C><14>ʨD?<3F>~<02>F<EFBFBD>$<24>`<60>q+<2B><><EFBFBD>SW<53>(+<2B><>P<EFBFBD>c<1E>u[<5B>m<EFBFBD>`O<>ܛ<EFBFBD>ϖT

View File

@@ -1,13 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w VKM/Y6Wy0gmb2gc4Q00VzHQ4IAxfSyshuDoaAzlEkFM
vf18uoEN5ZLJ4HcJg85epaseh1CRL9/ncXtU2HpH+QE
-> ssh-ed25519 cK5kHw sMuG07kjlI6VjPjELOUPzkn+KT9Yq7BPf0zSATM2aGI
/eODwL8KwyVgFjBK2MJlbqjN7mEvXCSsjq9D96szrng
-> ssh-ed25519 CAWG4Q t3/Ty7yCqC5x8KQY4VaHSQ9Q3epqMpXoBDKyKx9+VzE
JwgUsqMd+1jFZvFp9/SIoowbhSMVEkKp03T69+OHjho
-> ssh-ed25519 xA739A 0ohmKK427+4vupivrtjXp0dDK8wT4XUA9rWgcsCGKgA
msbeQyz3pL8RLtAeXX5tsfyHyOXxhfYpqaLEKnRxpPQ
-> ssh-ed25519 MSF3dg H+6jAoP7/Dxp8C/7Bk1C4CT1hpkUhtbnTWWIxkO24Ec
SrMuUG93T5lUw3xINEen5EEKLXJizIGFhBO1fVroFHE
--- tIPnH9cxTV3m3qzvZB97Egz+raWwZJ182BXXKDu8f+o
<EFBFBD><EFBFBD>f#<23>,|<7C>Ey.v<>DL<44>Ӻ<05>JPX<50><07><>`<60><><EFBFBD><EFBFBD>-#<23>F<EFBFBD>Ubs<62>(Q!?<3F><1A>#xJG?5<><35><EFBFBD><EFBFBD><EFBFBD>~<7E><>6MA<15> U<><55><EFBFBD>C<01><>M<>$+}W<>NϨG!<21><><EFBFBD><EFBFBD>a<EFBFBD><61><EFBFBD><EFBFBD>%<25>ǽ<EFBFBD>G

View File

@@ -1,13 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY
RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4
-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE
Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM
-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE
FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU
-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU
sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0
-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc
8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA
--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ
-b<>#<23><>M.<16>@<40>t<EFBFBD><74><EFBFBD>ŵ}+ό#@<40><><EFBFBD><EFBFBD><EFBFBD>k<EFBFBD>y<EFBFBD><79><EFBFBD>?v<><76>n<1F><>T<EFBFBD>+<2B><><EFBFBD>[<5B>Q<EFBFBD> gA<67><41><EFBFBD>

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More