Add fox to SLURM #167

Manually merged
rarias merged 13 commits from fox-slurm into master 2025-09-03 12:14:38 +02:00
28 changed files with 214 additions and 66 deletions

View File

@@ -16,8 +16,7 @@ rec {
};
hostGroup = with hosts; rec {
untrusted = [ fox ];
compute = [ owl1 owl2 ];
compute = [ owl1 owl2 fox ];
playground = [ eudy koro weasel ];
storage = [ bay lake2 ];
monitor = [ hut ];
@@ -31,6 +30,7 @@ rec {
admins = {
"rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
"rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
"rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
};
}

View File

@@ -7,6 +7,7 @@
../module/ceph.nix
../module/slurm-server.nix
./nfs.nix
./wireguard.nix
];
# Don't install grub MBR for now

35
m/apex/wireguard.nix Normal file
View File

@@ -0,0 +1,35 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgApex.file = ../../secrets/wg-apex.age;
# Enable WireGuard
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
ips = [ "10.106.0.30/24" ];
listenPort = 666;
privateKeyFile = config.age.secrets.wgApex.path;
# Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
peers = [
{
name = "Fox";
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
allowedIPs = [ "10.106.0.0/24" ];
arocanon marked this conversation as resolved Outdated

Would it make sense to narrow it down to "10.100.0.1/32"?

Would it make sense to narrow it down to "10.100.0.1/32"?

If we were only planning on supporting one node yes, but I may add other machines in the future, so I reserved the 256 IPs.

If we were only planning on supporting one node yes, but I may add other machines in the future, so I reserved the 256 IPs.
endpoint = "fox.ac.upc.edu:666";
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
persistentKeepalive = 25;
}
];
};
};
networking.hosts = {
"10.106.0.1" = [ "fox" ];
};
}

View File

@@ -14,7 +14,7 @@
nftables.enable = lib.mkForce false;
hosts = {
"84.88.53.236" = [ "apex" "ssfhead.bsc.es" "ssfhead" ];
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
"84.88.51.152" = [ "raccoon" ];
"84.88.51.142" = [ "raccoon-ipmi" ];
};

View File

@@ -6,6 +6,8 @@
../common/xeon/console.nix
../module/emulation.nix
../module/nvidia.nix
../module/slurm-client.nix
./wireguard.nix
];
# Don't turn off on August as UPC has different dates.
@@ -35,6 +37,8 @@
services.openssh.settings.X11Forwarding = true;
services.fail2ban.enable = true;
# Use SSH tunnel to reach internal hosts
programs.ssh.extraConfig = ''
Host bscpm04.bsc.es gitlab-internal.bsc.es tent
@@ -91,4 +95,20 @@
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

46
m/fox/wireguard.nix Normal file
View File

@@ -0,0 +1,46 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgFox.file = ../../secrets/wg-fox.age;
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
# Determines the IP address and subnet of the server's end of the tunnel interface.
ips = [ "10.106.0.1/24" ];
# The port that WireGuard listens to. Must be accessible by the client.
listenPort = 666;
# Path to the private key file.
privateKeyFile = config.age.secrets.wgFox.path;
# Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
peers = [
# List of allowed peers.
{
name = "Apex";
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
allowedIPs = [ "10.106.0.30/32" ];
}
];
};
};
networking.hosts = {
"10.106.0.30" = [ "apex" ];
};
networking.firewall = {
extraCommands = ''
# Accept slurm connections to slurmd from apex (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
'';
};
}

View File

@@ -31,10 +31,12 @@ in {
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
@@ -62,7 +64,7 @@ in {
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
#SuspendExcNodes=
SuspendExcNodes=fox
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600

View File

@@ -13,6 +13,11 @@
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from compute nodes for srun
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
# Accept slurm connections to controller from fox (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from fox for srun (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
'';
};
}

View File

@@ -39,6 +39,7 @@
};
hosts = {
"10.0.44.4" = [ "tent" ];
"84.88.53.236" = [ "apex" ];
};
};

View File

@@ -33,6 +33,9 @@
nameservers = [ "84.88.52.35" "84.88.52.36" ];
search = [ "bsc.es" "ac.upc.edu" ];
defaultGateway = "10.0.44.1";
hosts = {
"84.88.53.236" = [ "apex" ];
};
};
services.p.enable = true;

Binary file not shown.

View File

@@ -1,11 +1,13 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg d7+nvfAcdC3GjJxipXFrsfGGyP5jAY+gRWRV+4FVYAM
CG7r0bRGgnUWcdfDnpe7HwZ3L/y7b5iuJuqvf15b3/Y
-> ssh-ed25519 CAWG4Q X0vITOErz4wkR3VQYOcVlnrkHtwe+ytdZz1Hcrs4vVs
6IWYOhXLQ+BnML9YfLLHJYEO2CZ/uEc9IBqhoWvjDHI
-> ssh-ed25519 xA739A p5e/0AJtZ0+zbRvkB/usLuxusY8xXRx9Ksi/LQlcIHw
M4S/qlzT9POyJx4gY9lmycstUcdwG2cinN4OlV22zzo
-> ssh-ed25519 MSF3dg Ydl7uBWzBx6sAaxbzC3x8qiaU3ysGqV4rUFLpHCEV30
/1AUHBhCNOs9i7LJbmzwQDHsu+ybzYf6+coztKk5E3U
--- kYt15WxClpT7PXD1oFe9GqJU+OswjH7y9wIc8/GzZ7M
<EFBFBD><EFBFBD>h<>ߓ<><DF93><EFBFBD>`<60><><EFBFBD>V4F<34><46>_k)^<5E>m$uj:ѳ<><D1B3><17><><EFBFBD>}<7D>Z]$U]<12>u<EFBFBD> <20>0<EFBFBD><30><EFBFBD>v8<76>?<3F>X<EFBFBD>P<EFBFBD>g%d<>#<23>d9{rAi<41><69>
-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik
DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI
-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio
hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU
-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw
hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs
-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY
eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM
-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg
/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg
--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>YM<EFBFBD><EFBFBD>:E O<><4F>2<EFBFBD>r=<15>&4<><04>CQΣ<51><CEA3>hC<68><43><EFBFBD>cb<63>^Sy<53><79>% <09><>x-vC`g<><15><><EFBFBD><EFBFBD>W^<5E><>wVG <0B><><EFBFBD>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -2,6 +2,8 @@ let
keys = import ../keys.nix;
adminsKeys = builtins.attrValues keys.admins;
hut = [ keys.hosts.hut ] ++ adminsKeys;
fox = [ keys.hosts.fox ] ++ adminsKeys;
apex = [ keys.hosts.apex ] ++ adminsKeys;
mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
tent = [ keys.hosts.tent ] ++ adminsKeys;
# Only expose ceph keys to safe nodes and admins
@@ -24,4 +26,7 @@ in
"ceph-user.age".publicKeys = safe;
"munge-key.age".publicKeys = safe;
"wg-fox.age".publicKeys = fox;
"wg-apex.age".publicKeys = apex;
}

View File

@@ -1,11 +1,13 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w HlQ4V8lBd3im5j8KHEuQZBTuztvPj1QoWdv6FL6qzGI
Jpt91X1UIIVFQt1X6Q//kALn+Cetp/LqBZZvTuhFthw
-> ssh-ed25519 CAWG4Q StnngJAcuAwUnTrXDR3nJ2KFN0jNdTqSz+/1TfmWkzA
CR4AQ6fqaJVY1mdUIX1gzaZwRs1sU8F8hHztnkN8vN0
-> ssh-ed25519 xA739A xya5A5t63Owx+VrGgUfV/lIP8b/xV1cerMpuZBLaDVM
w+pA583yUnFq2AvGBGzWbQIGQEY9WqW0CSLQ9v+SG0c
-> ssh-ed25519 MSF3dg aXkLxCyYdOwVopHHmpXEI6WlAIizKdJi4IO0KEdhS3s
WKXkTszZN66+QZdSDJ4D9q7xgYWMfliOLCubIF2Dqkc
--- uVWoU2lMkqQ/9Z0BqKRCeUpsKi8lwmHukT/FV8wYMbg
<EFBFBD><EFBFBD>1G+<2B>6<EFBFBD><36>g[|x]2T<32>й<EFBFBD><D0B9><EFBFBD> <20>CKu)<29><><EFBFBD>]<5D><><38><D693><EFBFBD><EFBFBD>l<EFBFBD><6C>S<EFBFBD><53><EFBFBD>Q<EFBFBD><07><>x<EFBFBD><78><EFBFBD><EFBFBD>#7r<37>k{*<2A><>3ս~C<>b<EFBFBD><62><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڵ<EFBFBD>Np<1E><05>]J]h<>je+d%Е<>#<23>m<EFBFBD>?=6}<7D>
-> ssh-ed25519 G5LX5w Zhbs+NM/SI49qQ0X8bBpWUWxYM0vUKCXNAnPpIE2NR0
CkBUmJ26EkwHztT8Pz0UGq2KZwN0Xz8iYQ9cEHL9OWQ
-> ssh-ed25519 cK5kHw 5KjUXJywRDp2A7l5ukTCS+WIAalxwP1f71ejGxwNrX4
JW8OLmfkULXo9AwYMGNyOgZ+nQ0MVc0PCM4kKPIo6V4
-> ssh-ed25519 CAWG4Q cVjY3R0ZHAfokA4kWlu5vOl2Gs7mdqRgRk4WSUOXAjg
IxEDvuximW99EqxmpW+Btpm0Zydmwg/u87bqnl26NYc
-> ssh-ed25519 xA739A hmuwZuxmJnuAjmU4X8yhPQ+hPWvN1G+ZS0pvD7fHamg
fnAPW6ZCrv5pSO4RQhhr8xz7ij7jAZJk0ApWluOXDng
-> ssh-ed25519 MSF3dg SSGLcWnum0Qo/0OnKDZVg9xAZMwGwVNYYmRJXxb4GU0
pdl6kATG7n2oMsoUboBfu+vDKurJcH1UvUa70rfMQkE
--- a2ZQAeAQlO9DWnegIAq6NpI1Po6f38l+hitZvq+zIW8
<EFBFBD>\ֺ"^<5E>DT<44>H<EFBFBD><48>3<EFBFBD><33><EFBFBD>_|.h<0E><><EFBFBD><EFBFBD><03>^<5E>n<14><0E><><EFBFBD><EFBFBD><1A>g<EFBFBD>S<EFBFBD>]_<><5F>?n<>z~2<>!<21>p7<70><37><<3C><14>ʨD?<3F>~<02>F<EFBFBD>$<24>`<60>q+<2B><><EFBFBD>SW<53>(+<2B><>P<EFBFBD>c<1E>u[<5B>m<EFBFBD>`O<>ܛ<EFBFBD>ϖT

View File

@@ -1,11 +1,13 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w sg9SmahxBg35MDIxhrp4oHkaTaxsKoVQju2eNhCt0BM
CZ64dEGqz2tbkG8KtimZvLUEMrQpVVBJP7Fu46WTMgc
-> ssh-ed25519 CAWG4Q jzS1R14W1CWxdziMLG/yCGPLWSkiyE+9lqyCVe491ng
acJo/nhKq3pSPoFEPaFLN1fzHHbEzstNoLtohWAHKiM
-> ssh-ed25519 xA739A qeGJoLeSIQwLU2Yg+Gi2bikHJ3HscLfyo1msqL3JwHw
tTwaxRBKTl/SoyY/LnxR/j/5WvCNX5VeZLKi018YMrY
-> ssh-ed25519 MSF3dg Wym7Uyf1XvH1H6mNDERkO8opkMiN0zzXm2PjXftEOWs
Uw8ZwwKIB5UqgVuoSLE2QajNDJZkH7/Y3Nsy+WFl7Xs
--- 94hGVbYiCGZdMEJesCMLh7IZi+w5l/Kr1lZJHQgrc0o
j5j磛<6A><04><>J<EFBFBD><4A><EFBFBD>a<EFBFBD>]<5D>a%dr<64><72>FDT<44><54>^<5E><>Q<EFBFBD>s/<2F>kwB<77>$<24><>$<24><>H<EFBFBD>'<27><><EFBFBD><EFBFBD><EFBFBD>w<14><?^|<7C><07>h$<24>ؗ<EFBFBD>GI<47>ĕsT2RU<52><55>*/O<>7<EFBFBD><37><EFBFBD>G<EFBFBD><70>4<EFBFBD><34><EFBFBD>M9<4D>j<><06>
-> ssh-ed25519 G5LX5w VKM/Y6Wy0gmb2gc4Q00VzHQ4IAxfSyshuDoaAzlEkFM
vf18uoEN5ZLJ4HcJg85epaseh1CRL9/ncXtU2HpH+QE
-> ssh-ed25519 cK5kHw sMuG07kjlI6VjPjELOUPzkn+KT9Yq7BPf0zSATM2aGI
/eODwL8KwyVgFjBK2MJlbqjN7mEvXCSsjq9D96szrng
-> ssh-ed25519 CAWG4Q t3/Ty7yCqC5x8KQY4VaHSQ9Q3epqMpXoBDKyKx9+VzE
JwgUsqMd+1jFZvFp9/SIoowbhSMVEkKp03T69+OHjho
-> ssh-ed25519 xA739A 0ohmKK427+4vupivrtjXp0dDK8wT4XUA9rWgcsCGKgA
msbeQyz3pL8RLtAeXX5tsfyHyOXxhfYpqaLEKnRxpPQ
-> ssh-ed25519 MSF3dg H+6jAoP7/Dxp8C/7Bk1C4CT1hpkUhtbnTWWIxkO24Ec
SrMuUG93T5lUw3xINEen5EEKLXJizIGFhBO1fVroFHE
--- tIPnH9cxTV3m3qzvZB97Egz+raWwZJ182BXXKDu8f+o
<EFBFBD><EFBFBD>f#<23>,|<7C>Ey.v<>DL<44>Ӻ<05>JPX<50><07><>`<60><><EFBFBD><EFBFBD>-#<23>F<EFBFBD>Ubs<62>(Q!?<3F><1A>#xJG?5<><35><EFBFBD><EFBFBD><EFBFBD>~<7E><>6MA<15> U<><55><EFBFBD>C<01><>M<>$+}W<>NϨG!<21><><EFBFBD><EFBFBD>a<EFBFBD><61><EFBFBD><EFBFBD>%<25>ǽ<EFBFBD>G

View File

@@ -1,12 +1,13 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w 5K0mzfJGvAB2LGmoQ9ZLbWooVEX6F4+fQdo1JUoB3FM
AKGa507bUrYjXFaMQ1MXTDBFYsdS6zbs+flmxYN0UNo
-> ssh-ed25519 CAWG4Q 8KzLc949on8iN1pK8q11OpCIeO71t6b0zxCLHhcQ6ns
uy7z6RdIuoUes+Uap3k5eoFFuu/DcSrEBwq4V4C/ygc
-> ssh-ed25519 xA739A SLx5cKo0fdAHj+cLpJ4FYTWTUTyDsCqKQOufDu3xnGo
VnS/WsiSaf6RpXuhgfij4pYu4p9hlJl1oXrfYY9rKlQ
-> ssh-ed25519 MSF3dg c5ZXvdNxNfZU3HeWsttuhy+UC5JxWN/IFuCuCGbksn4
vcKlIirf+VvERX71YpmwW6zp6ClhlG2PR4R8LIN7cQo
--- pJKICDaYAlxqNnvHIuzB3Yk7tv0ZNYflGTQD+Zk/8+4
<EFBFBD>h/\J<>J
<EFBFBD>0?<3F> <20>p<EFBFBD><70><EFBFBD>@܉7<DC89><37>3<EFBFBD><33><EFBFBD><EFBFBD>z<EFBFBD><7A><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>a<EFBFBD><61>'<27>,ka<6B>I<EFBFBD>XXOZ<4F>I\<5C><><EFBFBD><EFBFBD><EFBFBD> <09>BP<42><50>/cUɿ~B<><42>S' Q<><51><EFBFBD><EFBFBD>f<06><><EFBFBD>er<65><72><EFBFBD><EFBFBD>^<5E><><EFBFBD><EFBFBD>8l<38><6C>V<EFBFBD>E<EFBFBD><45><EFBFBD>
-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY
RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4
-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE
Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM
-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE
FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU
-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU
sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0
-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc
8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA
--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ
-b<>#<23><>M.<16>@<40>t<EFBFBD><74><EFBFBD>ŵ}+ό#@<40><><EFBFBD><EFBFBD><EFBFBD>k<EFBFBD>y<EFBFBD><79><EFBFBD>?v<><76>n<1F><>T<EFBFBD>+<2B><><EFBFBD>[<5B>Q<EFBFBD> gA<67><41><EFBFBD>

Binary file not shown.

View File

@@ -1,12 +1,14 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w /RF8uZ/KahUqjEFILbF3+Jin+U0SQdoQChcc9RJ9axc
aEmPk++86nBR6d2BIa/oaUdyiLS6cH8TUoYJE3bxba4
-> ssh-ed25519 CAWG4Q qHyh9nQi8c3z/KHby9y5vhzN0Dwz0zca98ebjJmXrzs
ZbmwNzrSSQ3RvskE8SqcBa0vMy8pzm/HPGHLm5zuPGQ
-> ssh-ed25519 xA739A FlGbfS4bUxA3gVDzb3yPjp4hV8a7aiNBLUctnN3bGEY
3fI6SyVjVhh2M8uc/XV3blpdQMPMYi2qzaHNXvx0bvM
-> ssh-ed25519 MSF3dg 0Bs/aW0nNISS+93It75o6hKZWa7S+LF5bF5ApsJ2fQ8
y7o0KYDHEen13ndIxg/mYil3eMxxzvYF2pWqhMb+rBU
--- Iqo75G4+02Y9nc1OOkcEx+iQlKnGYCekAx76tRH53wA
<10>
<EFBFBD>X<EFBFBD><EFBFBD>%f <0C><><12>hX <0B><>R<>c<EFBFBD>+z<><7A>eg<65>& <20>d<EFBFBD><64><EFBFBD>ק<06><>A<EFBFBD><41><EFBFBD>чXM<58>1<EFBFBD>
-> ssh-ed25519 G5LX5w SRJhNenoQXbT1FgX3TMPnVH5P6oe2eHot+M1YsEjsEk
hfTSLgKi98Eh7JK5o7x2POpTEtQlQCpEa3keUFYCuME
-> ssh-ed25519 cK5kHw z5TwWJTkvx7HztjXHJW/aCOtOfPrQaLP0gyIT7rXcyU
b4NCpHfasgvkLLr+6LcWUl60p59aSNnfp3bl2OFYXo0
-> ssh-ed25519 CAWG4Q 4VpS1/OnFe8nxcQbRTKNhjsh/ZQ5cbhSMXwK/jjQ+3o
WF9wvOkqVml4UcEzyzeumKuUwCwwr2zvKLMg+PCB8nk
-> ssh-ed25519 xA739A 67FhuJ070jBVMt/xbKHWhfri6iIm0FyaFvzQabsvFBM
1G5/913dDv/r/6p1x/c5YiUnZzrX/LvIj33KW+PN0KU
-> ssh-ed25519 MSF3dg Bj/yB4N2wkyHCHC22tcjjJAA4ebSamN0Z4UVX3ZnryI
6D/ZgTs+j+MGDAbPU5zyK0i9zN6tQy68IcOnQZ27mYg
--- 169erk3ICSYLs4FPEuXCn7QlekWhsmSn0Lr+/R14I5Q
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><05>ҽ3<D2BD>s<EFBFBD>
w<EFBFBD><EFBFBD>4D<EFBFBD><EFBFBD>b.<2E><><EFBFBD>"|<7C><><EFBFBD>)"<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>;<3B>.<2E>ɫ7)<29>LeC<05>=S؟

BIN
secrets/wg-apex.age Normal file

Binary file not shown.

14
secrets/wg-fox.age Normal file
View File

@@ -0,0 +1,14 @@
age-encryption.org/v1
-> ssh-ed25519 cDBabA heyW9/cxgwFX9IexQIXjAQDWGQPNcMXcArQp2Rxsqx4
o9MQ7EH8PDDjsJdpH9F3Xq2zUoaDAJQlfFmYucSFs6Y
-> ssh-ed25519 cK5kHw Sza4pos7K3qW3omEeyidI/jszJNf9smemSZnUJfCIww
D6vazXki7hIYraIuSiGPS+FPbkFUwHhHWDf52OhEIMg
-> ssh-ed25519 CAWG4Q YexIHueOIMmIN8JIDyNUOKBkyz/k18HqV3hTXh48KlM
xh8UJzzWT6ByN+Dpn4JrMNsjGC/uc/v6LynwjBDz9NQ
-> ssh-ed25519 xA739A KySG3TXdqfCMUkVEDGa74B0op745s3XGYxFLyAXSQAc
5EI/yb5ctW9Qu18bHm3/sK97kwGcKzzmWvPSCWm89XA
-> ssh-ed25519 MSF3dg MNxnNj0fHmri8ophexXPNjRUBUWrzcuk5S1mucxUMTE
GVFWXtISEU8ZmlwL4nh4weAgfGrt2GHX0DTzbpS6zg8
--- UdrqkYG2ZApAuwdZeNhC50NP2rkD/Ol6y8nJa4RHx7Y
<EFBFBD>ܻ<EFBFBD>m(<28><><EFBFBD>><3E>H<48>Y87<><37>G<0F>+*<12><><EFBFBD><EFBFBD>9V<>.<2E><><EFBFBD><EFBFBD><03><><EFBFBD>p<EFBFBD>Oo<4F>=+哇<>P0<50><30>{<7B>)<29><17><><EFBFBD><EFBFBD>><3E>z3P^
u

View File

@@ -21,17 +21,28 @@ the detailed specifications:
## Access
To access the machine, request a SLURM session from [hut](/hut) using the `fox`
partition:
To access the machine, request a SLURM session from [apex](/apex) using the `fox`
partition. If you need the machine for performance measurements, use an
exclusive reservation:
hut% salloc -p fox
apex% salloc -p fox --exclusive
Then connect via ssh:
Otherwise, specify the CPUs that you need so other users can also use the node
at the same time:
hut% ssh fox
apex% salloc -p fox -c 8
Then use srun to execute an interactive shell:
apex% srun --pty $SHELL
fox%
Follow [these steps](/access) if you don't have access to hut or fox.
Make sure you get all CPUs you expect:
fox% grep Cpus_allowed_list /proc/self/status
Cpus_allowed_list: 0-191
Follow [these steps](/access) if you don't have access to apex or fox.
## CUDA
@@ -89,9 +100,5 @@ Then just run `nix develop` from the same directory:
The machine has several file systems available.
- `$HOME`: Mounted via NFS across all nodes. It is slow and has low capacity.
Don't abuse.
- `/ceph/home/$USER`: Shared Ceph file system across jungle nodes. Slow but high
capacity. Stores three redundant copies of every file.
- `/nvme{0,1}/$USER`: The two local NVME disks, very fast and large capacity.
- `/tmp`: tmpfs, fast but not backed by a disk. Will be erased on reboot.