Compare commits
10 Commits
3380ec5e05
...
00fe0f46a1
Author | SHA1 | Date | |
---|---|---|---|
00fe0f46a1 | |||
79940876c3 | |||
163d19bd05 | |||
360f67cfab | |||
a402bc880c | |||
c441178910 | |||
9c3fbc0ec9 | |||
3f8e6b9fcd | |||
08e4dda6d2 | |||
26a4a26ce0 |
25
keys.nix
25
keys.nix
@ -2,21 +2,22 @@
|
|||||||
# here all the public keys
|
# here all the public keys
|
||||||
rec {
|
rec {
|
||||||
hosts = {
|
hosts = {
|
||||||
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
||||||
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
||||||
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
||||||
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
||||||
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
||||||
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
||||||
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
||||||
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
|
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
|
||||||
tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
|
tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
|
||||||
apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
|
apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
|
||||||
weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
|
weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
|
||||||
|
raccoon = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNQttFvL0dNEyy7klIhLoK4xXOeM2/K9R7lPMTG3qvK raccoon";
|
||||||
};
|
};
|
||||||
|
|
||||||
hostGroup = with hosts; rec {
|
hostGroup = with hosts; rec {
|
||||||
compute = [ owl1 owl2 fox ];
|
compute = [ owl1 owl2 fox raccoon ];
|
||||||
playground = [ eudy koro weasel ];
|
playground = [ eudy koro weasel ];
|
||||||
storage = [ bay lake2 ];
|
storage = [ bay lake2 ];
|
||||||
monitor = [ hut ];
|
monitor = [ hut ];
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
../common/xeon.nix
|
../common/xeon.nix
|
||||||
../common/ssf/hosts.nix
|
../common/ssf/hosts.nix
|
||||||
../module/ceph.nix
|
../module/ceph.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
../module/slurm-server.nix
|
../module/slurm-server.nix
|
||||||
./nfs.nix
|
./nfs.nix
|
||||||
./wireguard.nix
|
./wireguard.nix
|
||||||
@ -56,17 +57,6 @@
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# Use SSH tunnel to reach internal hosts
|
|
||||||
programs.ssh.extraConfig = ''
|
|
||||||
Host bscpm04.bsc.es gitlab-internal.bsc.es knights3.bsc.es
|
|
||||||
ProxyCommand nc -X connect -x localhost:23080 %h %p
|
|
||||||
Host raccoon
|
|
||||||
HostName knights3.bsc.es
|
|
||||||
ProxyCommand nc -X connect -x localhost:23080 %h %p
|
|
||||||
Host tent
|
|
||||||
ProxyJump raccoon
|
|
||||||
'';
|
|
||||||
|
|
||||||
networking.firewall = {
|
networking.firewall = {
|
||||||
extraCommands = ''
|
extraCommands = ''
|
||||||
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
|
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
|
||||||
@ -76,10 +66,4 @@
|
|||||||
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
|
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# Use tent for cache
|
|
||||||
nix.settings = {
|
|
||||||
extra-substituters = [ "https://jungle.bsc.es/cache" ];
|
|
||||||
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
@ -25,11 +25,18 @@
|
|||||||
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
|
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
|
||||||
persistentKeepalive = 25;
|
persistentKeepalive = 25;
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
name = "raccoon";
|
||||||
|
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
|
||||||
|
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
|
||||||
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
networking.hosts = {
|
networking.hosts = {
|
||||||
"10.106.0.1" = [ "fox" ];
|
"10.106.0.1" = [ "fox" ];
|
||||||
|
"10.106.0.236" = [ "raccoon" ];
|
||||||
|
"10.0.44.4" = [ "tent" ];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/ssf.nix
|
../common/ssf.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
../module/monitoring.nix
|
../module/monitoring.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -15,8 +15,9 @@
|
|||||||
|
|
||||||
hosts = {
|
hosts = {
|
||||||
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
|
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
|
||||||
"84.88.51.152" = [ "raccoon" ];
|
|
||||||
"84.88.51.142" = [ "raccoon-ipmi" ];
|
"84.88.51.142" = [ "raccoon-ipmi" ];
|
||||||
|
"192.168.11.12" = [ "bscpm04.bsc.es" ];
|
||||||
|
"192.168.11.15" = [ "gitlab-internal.bsc.es" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -156,18 +156,30 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
csiringo = {
|
csiringo = {
|
||||||
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
|
|
||||||
uid = 9653;
|
uid = 9653;
|
||||||
isNormalUser = true;
|
isNormalUser = true;
|
||||||
home = "/home/Computational/csiringo";
|
home = "/home/Computational/csiringo";
|
||||||
description = "Cesare Siringo";
|
description = "Cesare Siringo";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "apex" "weasel" ];
|
hosts = [ ];
|
||||||
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
|
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
acinca = {
|
||||||
|
uid = 9654;
|
||||||
|
isNormalUser = true;
|
||||||
|
home = "/home/Computational/acinca";
|
||||||
|
description = "Arnau Cinca";
|
||||||
|
group = "Computational";
|
||||||
|
hosts = [ "apex" "hut" "fox" "owl1" "owl2" ];
|
||||||
|
hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/";
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc"
|
||||||
|
];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
groups = {
|
groups = {
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
./xeon.nix
|
./xeon.nix
|
||||||
./ssf/fs.nix
|
./ssf/fs.nix
|
||||||
./ssf/hosts.nix
|
./ssf/hosts.nix
|
||||||
|
./ssf/hosts-remote.nix
|
||||||
./ssf/net.nix
|
./ssf/net.nix
|
||||||
./ssf/ssh.nix
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
9
m/common/ssf/hosts-remote.nix
Normal file
9
m/common/ssf/hosts-remote.nix
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
{ pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.hosts = {
|
||||||
|
# Remote hosts visible from compute nodes
|
||||||
|
"10.106.0.236" = [ "raccoon" ];
|
||||||
|
"10.0.44.4" = [ "tent" ];
|
||||||
|
};
|
||||||
|
}
|
@ -1,16 +0,0 @@
|
|||||||
{
|
|
||||||
# Use SSH tunnel to apex to reach internal hosts
|
|
||||||
programs.ssh.extraConfig = ''
|
|
||||||
Host tent
|
|
||||||
ProxyJump raccoon
|
|
||||||
|
|
||||||
# Access raccoon via the HTTP proxy
|
|
||||||
Host raccoon knights3.bsc.es
|
|
||||||
HostName knights3.bsc.es
|
|
||||||
ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p'
|
|
||||||
|
|
||||||
# Make sure we can reach gitlab even if we don't have SSH access to raccoon
|
|
||||||
Host bscpm04.bsc.es gitlab-internal.bsc.es
|
|
||||||
ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p'
|
|
||||||
'';
|
|
||||||
}
|
|
@ -9,6 +9,7 @@
|
|||||||
./cpufreq.nix
|
./cpufreq.nix
|
||||||
./fs.nix
|
./fs.nix
|
||||||
./users.nix
|
./users.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
../module/debuginfod.nix
|
../module/debuginfod.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
../module/nvidia.nix
|
../module/nvidia.nix
|
||||||
../module/slurm-client.nix
|
../module/slurm-client.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
./wireguard.nix
|
./wireguard.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -45,16 +46,6 @@
|
|||||||
|
|
||||||
services.fail2ban.enable = true;
|
services.fail2ban.enable = true;
|
||||||
|
|
||||||
# Use SSH tunnel to reach internal hosts
|
|
||||||
programs.ssh.extraConfig = ''
|
|
||||||
Host bscpm04.bsc.es gitlab-internal.bsc.es tent
|
|
||||||
ProxyJump raccoon
|
|
||||||
Host raccoon
|
|
||||||
ProxyJump apex
|
|
||||||
HostName 127.0.0.1
|
|
||||||
Port 22022
|
|
||||||
'';
|
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
|
timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
|
||||||
hostName = "fox";
|
hostName = "fox";
|
||||||
@ -72,12 +63,6 @@
|
|||||||
interfaces.enp1s0f0np0.useDHCP = true;
|
interfaces.enp1s0f0np0.useDHCP = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
# Use hut for cache
|
|
||||||
nix.settings = {
|
|
||||||
extra-substituters = [ "https://jungle.bsc.es/cache" ];
|
|
||||||
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Recommended for new graphics cards
|
# Recommended for new graphics cards
|
||||||
hardware.nvidia.open = true;
|
hardware.nvidia.open = true;
|
||||||
|
|
||||||
|
@ -29,12 +29,19 @@
|
|||||||
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
|
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
|
||||||
allowedIPs = [ "10.106.0.30/32" ];
|
allowedIPs = [ "10.106.0.30/32" ];
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
name = "raccoon";
|
||||||
|
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
|
||||||
|
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
|
||||||
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
networking.hosts = {
|
networking.hosts = {
|
||||||
"10.106.0.30" = [ "apex" ];
|
"10.106.0.30" = [ "apex" ];
|
||||||
|
"10.106.0.236" = [ "raccoon" ];
|
||||||
|
"10.0.44.4" = [ "tent" ];
|
||||||
};
|
};
|
||||||
|
|
||||||
networking.firewall = {
|
networking.firewall = {
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
imports = [
|
imports = [
|
||||||
../common/ssf.nix
|
../common/ssf.nix
|
||||||
../module/monitoring.nix
|
../module/monitoring.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
||||||
|
@ -6,5 +6,8 @@
|
|||||||
{
|
{
|
||||||
extra-substituters = [ "http://hut/cache" ];
|
extra-substituters = [ "http://hut/cache" ];
|
||||||
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
||||||
|
|
||||||
|
# Set a low timeout in case hut is down
|
||||||
|
connect-timeout = 3; # seconds
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,12 @@
|
|||||||
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
||||||
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
||||||
KillMode = lib.mkForce "control-group";
|
KillMode = lib.mkForce "control-group";
|
||||||
|
|
||||||
|
# If slurmd fails to contact the control server it will fail, causing the
|
||||||
|
# node to remain out of service until manually restarted. Always try to
|
||||||
|
# restart it.
|
||||||
|
Restart = "always";
|
||||||
|
RestartSec = "30s";
|
||||||
};
|
};
|
||||||
|
|
||||||
services.slurm.client.enable = true;
|
services.slurm.client.enable = true;
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
programs.ssh.extraConfig = ''
|
|
||||||
Host apex ssfhead
|
|
||||||
HostName ssflogin.bsc.es
|
|
||||||
Host hut
|
|
||||||
ProxyJump apex
|
|
||||||
'';
|
|
||||||
}
|
|
@ -3,11 +3,13 @@
|
|||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/base.nix
|
../common/base.nix
|
||||||
|
../common/ssf/hosts.nix
|
||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
../module/debuginfod.nix
|
../module/debuginfod.nix
|
||||||
../module/ssh-hut-extern.nix
|
|
||||||
../module/nvidia.nix
|
../module/nvidia.nix
|
||||||
../eudy/kernel/perf.nix
|
../eudy/kernel/perf.nix
|
||||||
|
./wireguard.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
# Don't install Grub on the disk yet
|
# Don't install Grub on the disk yet
|
||||||
@ -43,9 +45,11 @@
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
nix.settings = {
|
# Mount the NFS home
|
||||||
extra-substituters = [ "https://jungle.bsc.es/cache" ];
|
fileSystems."/nfs/home" = {
|
||||||
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
device = "10.106.0.30:/home";
|
||||||
|
fsType = "nfs";
|
||||||
|
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
||||||
};
|
};
|
||||||
|
|
||||||
# Enable performance governor
|
# Enable performance governor
|
||||||
|
48
m/raccoon/wireguard.nix
Normal file
48
m/raccoon/wireguard.nix
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.nat = {
|
||||||
|
enable = true;
|
||||||
|
enableIPv6 = false;
|
||||||
|
externalInterface = "eno0";
|
||||||
|
internalInterfaces = [ "wg0" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.firewall = {
|
||||||
|
allowedUDPPorts = [ 666 ];
|
||||||
|
};
|
||||||
|
|
||||||
|
age.secrets.wgRaccoon.file = ../../secrets/wg-raccoon.age;
|
||||||
|
|
||||||
|
# Enable WireGuard
|
||||||
|
networking.wireguard.enable = true;
|
||||||
|
networking.wireguard.interfaces = {
|
||||||
|
wg0 = {
|
||||||
|
ips = [ "10.106.0.236/24" ];
|
||||||
|
listenPort = 666;
|
||||||
|
privateKeyFile = config.age.secrets.wgRaccoon.path;
|
||||||
|
# Public key: QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=
|
||||||
|
peers = [
|
||||||
|
{
|
||||||
|
name = "fox";
|
||||||
|
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
|
||||||
|
allowedIPs = [ "10.106.0.1/32" ];
|
||||||
|
endpoint = "fox.ac.upc.edu:666";
|
||||||
|
persistentKeepalive = 25;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "apex";
|
||||||
|
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
|
||||||
|
allowedIPs = [ "10.106.0.30/32" "10.0.40.0/24" ];
|
||||||
|
endpoint = "ssfhead.bsc.es:666";
|
||||||
|
persistentKeepalive = 25;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.hosts = {
|
||||||
|
"10.106.0.1" = [ "fox.wg" ];
|
||||||
|
"10.106.0.30" = [ "apex.wg" ];
|
||||||
|
};
|
||||||
|
}
|
@ -3,9 +3,9 @@
|
|||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/xeon.nix
|
||||||
|
../common/ssf/hosts.nix
|
||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
../module/debuginfod.nix
|
../module/debuginfod.nix
|
||||||
../module/ssh-hut-extern.nix
|
|
||||||
./monitoring.nix
|
./monitoring.nix
|
||||||
./nginx.nix
|
./nginx.nix
|
||||||
./nix-serve.nix
|
./nix-serve.nix
|
||||||
@ -15,6 +15,7 @@
|
|||||||
../hut/msmtp.nix
|
../hut/msmtp.nix
|
||||||
../module/p.nix
|
../module/p.nix
|
||||||
../module/vpn-dac.nix
|
../module/vpn-dac.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
@ -35,6 +36,7 @@
|
|||||||
defaultGateway = "10.0.44.1";
|
defaultGateway = "10.0.44.1";
|
||||||
hosts = {
|
hosts = {
|
||||||
"84.88.53.236" = [ "apex" ];
|
"84.88.53.236" = [ "apex" ];
|
||||||
|
"10.0.44.1" = [ "raccoon" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/ssf.nix
|
../common/ssf.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
# Select this using the ID to avoid mismatches
|
# Select this using the ID to avoid mismatches
|
||||||
|
@ -4,6 +4,7 @@ let
|
|||||||
hut = [ keys.hosts.hut ] ++ adminsKeys;
|
hut = [ keys.hosts.hut ] ++ adminsKeys;
|
||||||
fox = [ keys.hosts.fox ] ++ adminsKeys;
|
fox = [ keys.hosts.fox ] ++ adminsKeys;
|
||||||
apex = [ keys.hosts.apex ] ++ adminsKeys;
|
apex = [ keys.hosts.apex ] ++ adminsKeys;
|
||||||
|
raccoon = [ keys.hosts.raccoon ] ++ adminsKeys;
|
||||||
mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
|
mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
|
||||||
tent = [ keys.hosts.tent ] ++ adminsKeys;
|
tent = [ keys.hosts.tent ] ++ adminsKeys;
|
||||||
# Only expose ceph keys to safe nodes and admins
|
# Only expose ceph keys to safe nodes and admins
|
||||||
@ -29,4 +30,5 @@ in
|
|||||||
|
|
||||||
"wg-fox.age".publicKeys = fox;
|
"wg-fox.age".publicKeys = fox;
|
||||||
"wg-apex.age".publicKeys = apex;
|
"wg-apex.age".publicKeys = apex;
|
||||||
|
"wg-raccoon.age".publicKeys = raccoon;
|
||||||
}
|
}
|
||||||
|
BIN
secrets/wg-raccoon.age
Normal file
BIN
secrets/wg-raccoon.age
Normal file
Binary file not shown.
49
web/content/posts/2025-09-26/_index.md
Normal file
49
web/content/posts/2025-09-26/_index.md
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
---
|
||||||
|
title: "Update 2025-09-26"
|
||||||
|
author: "Rodrigo Arias Mallo"
|
||||||
|
date: 2025-09-26
|
||||||
|
---
|
||||||
|
|
||||||
|
This is a summary of notable changes introduced in the last two years. We
|
||||||
|
continue to maintain all machines updated to the last NixOS release (currently
|
||||||
|
NixOS 25.05).
|
||||||
|
|
||||||
|
### New compute node: fox
|
||||||
|
|
||||||
|
We have a new [fox machine](/fox), with two AMD Genoa 9684X CPUs and two NVIDIA
|
||||||
|
RTX4000 GPUs. During the last months we have been doing some tests and it seems
|
||||||
|
that most of the components work well. We have configured CUDA to use the NVIDIA
|
||||||
|
GPUs, as well as AMD uProf to trace performance and energy counters from the
|
||||||
|
CPUs.
|
||||||
|
|
||||||
|
### Upgraded login node: apex
|
||||||
|
|
||||||
|
We have upgraded the operating system on the login node to NixOS, which now runs
|
||||||
|
Linux 6.15.6. During the upgrade, we have detected a problem with the storage
|
||||||
|
disks. The `/` and `/home` partitions sit on a
|
||||||
|
[RAID 5](https://en.wikipedia.org/wiki/Standard_RAID_levels#RAID_5),
|
||||||
|
transparently handled by a RAID hardware controller which starts its own
|
||||||
|
firmware before passing the control to the BIOS to continue the boot sequence. A
|
||||||
|
problem during the startup of the firmware prevented the node to even reach the
|
||||||
|
BIOS screen.
|
||||||
|
|
||||||
|
After a long debugging session, we detected that the flash memory that stores
|
||||||
|
the firmware of the hardware controller was likely to be the issue, since
|
||||||
|
[memory cells](https://en.wikipedia.org/wiki/Flash_memory#Principles_of_operation)
|
||||||
|
may lose charge over time and can end up corrupting the content. We flashed
|
||||||
|
the latest firmware so the memory cells are charged again with the new bits and
|
||||||
|
that fixed the problem. Hopefully we will be able to use it for some more years.
|
||||||
|
|
||||||
|
The SLURM server has been moved to apex which allows users to also submit jobs
|
||||||
|
to fox.
|
||||||
|
|
||||||
|
### Migrated machines to BSC building
|
||||||
|
|
||||||
|
The server room had a temperature issue that had been affecting our machines
|
||||||
|
since the end of February of 2025. As the summer approached, the temperature
|
||||||
|
exceeded the safe limits for our hardware, so we had to shutdown the cluster.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Since then, we have moved the cluster to BSC premises, where it now rests at a
|
||||||
|
stable temperature, so hopefully we won't have more unscheduled downtime.
|
BIN
web/content/posts/2025-09-26/temp.png
Normal file
BIN
web/content/posts/2025-09-26/temp.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 97 KiB |
Loading…
x
Reference in New Issue
Block a user