11 Commits

Author SHA1 Message Date
b58370cf83 Add quickstart guide 2025-02-14 16:23:15 +01:00
581efb4312 Reject SSH connections without SLURM allocation 2025-02-13 22:13:23 +01:00
c32c1bd03b Add users to fox 2025-02-12 16:46:56 +01:00
1ddc5b7248 Add dalvare1 user 2025-02-12 16:39:51 +01:00
8968deb4db Add fox page in jungle website 2025-02-12 16:39:33 +01:00
5a21baf2be Mount NVME disks in /nvme{0,1} 2025-02-12 15:49:55 +01:00
f4534e1e5a Exclude fox from being suspended by slurm 2025-02-12 15:02:18 +01:00
d6ed4b4521 Use IPMI host names instead of IP addresses 2025-02-12 12:35:46 +01:00
049ad4d062 Add fox IPMI monitoring
Use agenix to store the credentials safely.
2025-02-12 12:10:45 +01:00
07ab4018d8 Add new fox machine 2025-02-11 21:55:49 +01:00
a1135306ed Add new GitLab runner for gitlab.bsc.es
It uses docker based on alpine and the host nix store, so we can perform
builds but isolate them from the system.
2025-01-28 12:58:44 +01:00
98 changed files with 593 additions and 2846 deletions

1
.gitattributes vendored
View File

@@ -1 +0,0 @@
*.pdf filter=lfs diff=lfs merge=lfs -text

Binary file not shown.

Binary file not shown.

34
flake.lock generated
View File

@@ -10,11 +10,11 @@
"systems": "systems" "systems": "systems"
}, },
"locked": { "locked": {
"lastModified": 1750173260, "lastModified": 1723293904,
"narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=", "narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=",
"owner": "ryantm", "owner": "ryantm",
"repo": "agenix", "repo": "agenix",
"rev": "531beac616433bac6f9e2a19feb8e99a22a66baf", "rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -30,11 +30,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1749650500, "lastModified": 1732868163,
"narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=", "narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=",
"ref": "refs/heads/master", "ref": "refs/heads/master",
"rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405", "rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f",
"revCount": 961, "revCount": 952,
"type": "git", "type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs" "url": "https://git.sr.ht/~rodarima/bscpkgs"
}, },
@@ -51,11 +51,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1744478979, "lastModified": 1700795494,
"narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=", "narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
"owner": "lnl7", "owner": "lnl7",
"repo": "nix-darwin", "repo": "nix-darwin",
"rev": "43975d782b418ebf4969e9ccba82466728c2851b", "rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -73,11 +73,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1745494811, "lastModified": 1703113217,
"narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=", "narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
"owner": "nix-community", "owner": "nix-community",
"repo": "home-manager", "repo": "home-manager",
"rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be", "rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -88,16 +88,16 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1752436162, "lastModified": 1736867362,
"narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=", "narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8", "rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "NixOS", "owner": "NixOS",
"ref": "nixos-25.05", "ref": "nixos-24.11",
"repo": "nixpkgs", "repo": "nixpkgs",
"type": "github" "type": "github"
} }

View File

@@ -1,11 +1,10 @@
{ {
inputs = { inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
agenix.url = "github:ryantm/agenix"; agenix.url = "github:ryantm/agenix";
agenix.inputs.nixpkgs.follows = "nixpkgs"; agenix.inputs.nixpkgs.follows = "nixpkgs";
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
self.lfs = false;
}; };
outputs = { self, nixpkgs, agenix, bscpkgs, ... }: outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
@@ -19,7 +18,6 @@ in
{ {
nixosConfigurations = { nixosConfigurations = {
hut = mkConf "hut"; hut = mkConf "hut";
tent = mkConf "tent";
owl1 = mkConf "owl1"; owl1 = mkConf "owl1";
owl2 = mkConf "owl2"; owl2 = mkConf "owl2";
eudy = mkConf "eudy"; eudy = mkConf "eudy";
@@ -28,8 +26,6 @@ in
lake2 = mkConf "lake2"; lake2 = mkConf "lake2";
raccoon = mkConf "raccoon"; raccoon = mkConf "raccoon";
fox = mkConf "fox"; fox = mkConf "fox";
apex = mkConf "apex";
weasel = mkConf "weasel";
}; };
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {

View File

@@ -9,28 +9,22 @@ rec {
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox"; fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDa9lId4rB/EKGkkCCVOy0cuId2SYLs+8W8kx0kmpO1y fox";
tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
}; };
hostGroup = with hosts; rec { hostGroup = with hosts; rec {
compute = [ owl1 owl2 fox ]; compute = [ owl1 owl2 fox ];
playground = [ eudy koro weasel ]; playground = [ eudy koro ];
storage = [ bay lake2 ]; storage = [ bay lake2 ];
monitor = [ hut ]; monitor = [ hut ];
login = [ apex ];
system = storage ++ monitor ++ login; system = storage ++ monitor;
safe = system ++ compute; safe = system ++ compute;
all = safe ++ playground; all = safe ++ playground;
}; };
admins = { admins = {
"rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
"rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
"rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"; root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
}; };
} }

View File

@@ -1,85 +0,0 @@
{ lib, config, pkgs, ... }:
{
imports = [
../common/xeon.nix
../common/ssf/hosts.nix
../module/ceph.nix
../module/slurm-server.nix
./nfs.nix
./wireguard.nix
];
# Don't install grub MBR for now
boot.loader.grub.device = "nodev";
boot.initrd.kernelModules = [
"megaraid_sas" # For HW RAID
];
environment.systemPackages = with pkgs; [
storcli # To manage HW RAID
];
fileSystems."/home" = {
device = "/dev/disk/by-label/home";
fsType = "ext4";
};
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
networking = {
hostName = "apex";
defaultGateway = "84.88.53.233";
nameservers = [ "8.8.8.8" ];
# Public facing interface
interfaces.eno1.ipv4.addresses = [ {
address = "84.88.53.236";
prefixLength = 29;
} ];
# Internal LAN to our Ethernet switch
interfaces.eno2.ipv4.addresses = [ {
address = "10.0.40.30";
prefixLength = 24;
} ];
# Infiniband over Omnipath switch (disconnected for now)
# interfaces.ibp5s0 = {};
nat = {
enable = true;
internalInterfaces = [ "eno2" ];
externalInterface = "eno1";
};
};
# Use SSH tunnel to reach internal hosts
programs.ssh.extraConfig = ''
Host bscpm04.bsc.es gitlab-internal.bsc.es knights3.bsc.es
ProxyCommand nc -X connect -x localhost:23080 %h %p
Host raccoon
HostName knights3.bsc.es
ProxyCommand nc -X connect -x localhost:23080 %h %p
Host tent
ProxyJump raccoon
'';
networking.firewall = {
extraCommands = ''
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
# logs. Insert as first position so we also protect SSH.
iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse
# Same with opsmonweb01.bsc.es which seems to be trying to access via SSH
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
'';
};
# Use tent for cache
nix.settings = {
extra-substituters = [ "https://jungle.bsc.es/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
}

View File

@@ -1,48 +0,0 @@
{ ... }:
{
services.nfs.server = {
enable = true;
lockdPort = 4001;
mountdPort = 4002;
statdPort = 4000;
exports = ''
/home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash)
/home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash)
'';
};
networking.firewall = {
# Check with `rpcinfo -p`
extraCommands = ''
# Accept NFS traffic from compute nodes but not from the outside
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
# Same but UDP
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
# Accept NFS traffic from wg0
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
# Same but UDP
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
'';
};
}

View File

@@ -1,35 +0,0 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgApex.file = ../../secrets/wg-apex.age;
# Enable WireGuard
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
ips = [ "10.106.0.30/24" ];
listenPort = 666;
privateKeyFile = config.age.secrets.wgApex.path;
# Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
peers = [
{
name = "Fox";
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
allowedIPs = [ "10.106.0.0/24" ];
endpoint = "fox.ac.upc.edu:666";
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
persistentKeepalive = 25;
}
];
};
};
networking.hosts = {
"10.106.0.1" = [ "fox" ];
};
}

View File

@@ -2,7 +2,7 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/xeon.nix
../module/monitoring.nix ../module/monitoring.nix
]; ];

View File

@@ -3,7 +3,6 @@
# Includes the basic configuration for an Intel server. # Includes the basic configuration for an Intel server.
imports = [ imports = [
./base/agenix.nix ./base/agenix.nix
./base/always-power-on.nix
./base/august-shutdown.nix ./base/august-shutdown.nix
./base/boot.nix ./base/boot.nix
./base/env.nix ./base/env.nix

View File

@@ -1,8 +0,0 @@
{
imports = [
../../module/power-policy.nix
];
# Turn on as soon as we have power
power.policy = "always-on";
}

View File

@@ -1,12 +1,12 @@
{ {
# Shutdown all machines on August 3rd at 22:00, so we can protect the # Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
# hardware from spurious electrical peaks on the yearly electrical cut for # hardware from spurious electrical peaks on the yearly electrical cut for
# manteinance that starts on August 4th. # manteinance that starts on August 4th.
systemd.timers.august-shutdown = { systemd.timers.august-shutdown = {
description = "Shutdown on August 3rd for maintenance"; description = "Shutdown on August 2nd for maintenance";
wantedBy = [ "timers.target" ]; wantedBy = [ "timers.target" ];
timerConfig = { timerConfig = {
OnCalendar = "*-08-03 22:00:00"; OnCalendar = "*-08-02 11:00:00";
RandomizedDelaySec = "10min"; RandomizedDelaySec = "10min";
Unit = "systemd-poweroff.service"; Unit = "systemd-poweroff.service";
}; };

View File

@@ -3,8 +3,8 @@
{ {
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns pv git-lfs ncdu config.boot.kernelPackages.perf ldns
# From bsckgs overlay # From bsckgs overlay
osumb osumb
]; ];
@@ -21,8 +21,6 @@
} }
]; ];
environment.enableAllTerminfo = true;
environment.variables = { environment.variables = {
EDITOR = "vim"; EDITOR = "vim";
VISUAL = "vim"; VISUAL = "vim";

View File

@@ -1,4 +1,4 @@
{ pkgs, lib, ... }: { pkgs, ... }:
{ {
networking = { networking = {
@@ -10,9 +10,6 @@
allowedTCPPorts = [ 22 ]; allowedTCPPorts = [ 22 ];
}; };
# Make sure we use iptables
nftables.enable = lib.mkForce false;
hosts = { hosts = {
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ]; "84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
"84.88.51.152" = [ "raccoon" ]; "84.88.51.152" = [ "raccoon" ];

View File

@@ -6,8 +6,6 @@
(import ../../../pkgs/overlay.nix) (import ../../../pkgs/overlay.nix)
]; ];
nixpkgs.config.allowUnfree = true;
nix = { nix = {
nixPath = [ nixPath = [
"nixpkgs=${nixpkgs}" "nixpkgs=${nixpkgs}"
@@ -25,7 +23,6 @@
trusted-users = [ "@wheel" ]; trusted-users = [ "@wheel" ];
flake-registry = pkgs.writeText "global-registry.json" flake-registry = pkgs.writeText "global-registry.json"
''{"flakes":[],"version":2}''; ''{"flakes":[],"version":2}'';
keep-outputs = true;
}; };
gc = { gc = {

View File

@@ -8,10 +8,20 @@ in
# Enable the OpenSSH daemon. # Enable the OpenSSH daemon.
services.openssh.enable = true; services.openssh.enable = true;
# Connect to intranet git hosts via proxy
programs.ssh.extraConfig = ''
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
User git
ProxyCommand nc -X connect -x hut:23080 %h %p
# Connect to BSC machines via hut proxy too
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
ProxyCommand nc -X connect -x hut:23080 %h %p
'';
programs.ssh.knownHosts = hostsKeys // { programs.ssh.knownHosts = hostsKeys // {
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
"bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT";
"glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; "glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
"glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz"; "glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
}; };

View File

@@ -20,7 +20,6 @@
rarias = { rarias = {
uid = 1880; uid = 1880;
isNormalUser = true; isNormalUser = true;
linger = true;
home = "/home/Computational/rarias"; home = "/home/Computational/rarias";
description = "Rodrigo Arias"; description = "Rodrigo Arias";
group = "Computational"; group = "Computational";
@@ -40,7 +39,7 @@
home = "/home/Computational/arocanon"; home = "/home/Computational/arocanon";
description = "Aleix Roca"; description = "Aleix Roca";
group = "Computational"; group = "Computational";
extraGroups = [ "wheel" "tracing" ]; extraGroups = [ "wheel" ];
hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
@@ -56,7 +55,7 @@
home = "/home/Computational/rpenacob"; home = "/home/Computational/rpenacob";
description = "Raúl Peñacoba"; description = "Raúl Peñacoba";
group = "Computational"; group = "Computational";
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ]; hosts = [ "owl1" "owl2" "hut" ];
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
@@ -69,10 +68,10 @@
home = "/home/Computational/anavarro"; home = "/home/Computational/anavarro";
description = "Antoni Navarro"; description = "Antoni Navarro";
group = "Computational"; group = "Computational";
hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ]; hosts = [ "hut" "raccoon" "fox" ];
hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1"; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
]; ];
}; };
@@ -82,7 +81,7 @@
home = "/home/Computational/abonerib"; home = "/home/Computational/abonerib";
description = "Aleix Boné"; description = "Aleix Boné";
group = "Computational"; group = "Computational";
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ]; hosts = [ "owl1" "owl2" "hut" "raccoon" ];
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
@@ -95,7 +94,7 @@
home = "/home/Computational/vlopez"; home = "/home/Computational/vlopez";
description = "Victor López"; description = "Victor López";
group = "Computational"; group = "Computational";
hosts = [ "apex" "koro" ]; hosts = [ "koro" ];
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0"; hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
@@ -108,7 +107,7 @@
home = "/home/Computational/dbautist"; home = "/home/Computational/dbautist";
description = "Dylan Bautista Cases"; description = "Dylan Bautista Cases";
group = "Computational"; group = "Computational";
hosts = [ "apex" "hut" "tent" "raccoon" ]; hosts = [ "hut" ];
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/"; hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
@@ -121,58 +120,16 @@
home = "/home/Computational/dalvare1"; home = "/home/Computational/dalvare1";
description = "David Álvarez"; description = "David Álvarez";
group = "Computational"; group = "Computational";
hosts = [ "apex" "hut" "tent" "fox" ]; hosts = [ "hut" "fox" ];
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
]; ];
}; };
varcila = {
uid = 5650;
isNormalUser = true;
home = "/home/Computational/varcila";
description = "Vincent Arcila";
group = "Computational";
hosts = [ "apex" "hut" "tent" "fox" ];
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
];
};
pmartin1 = {
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
uid = 9652;
isNormalUser = true;
home = "/home/Computational/pmartin1";
description = "Pedro J. Martinez-Ferrer";
group = "Computational";
hosts = [ "fox" ];
hashedPassword = "$6$nIgDMGnt4YIZl3G.$.JQ2jXLtDPRKsbsJfJAXdSvjDIzRrg7tNNjPkLPq3KJQhMjfDXRUvzagUHUU2TrE2hHM8/6uq8ex0UdxQ0ysl.";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a"
];
};
csiringo = {
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
uid = 9653;
isNormalUser = true;
home = "/home/Computational/csiringo";
description = "Cesare Siringo";
group = "Computational";
hosts = [ "apex" "weasel" ];
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
];
};
}; };
groups = { groups = {
Computational = { gid = 564; }; Computational = { gid = 564; };
tracing = { };
}; };
}; };
} }

View File

@@ -1,10 +0,0 @@
{
# Provides the base system for a xeon node in the SSF rack.
imports = [
./xeon.nix
./ssf/fs.nix
./ssf/hosts.nix
./ssf/net.nix
./ssf/ssh.nix
];
}

View File

@@ -1,23 +0,0 @@
{ pkgs, ... }:
{
networking.hosts = {
# Login
"10.0.40.30" = [ "apex" ];
# Storage
"10.0.40.40" = [ "bay" ]; "10.0.42.40" = [ "bay-ib" ]; "10.0.40.141" = [ "bay-ipmi" ];
"10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ];
"10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ]; "10.0.40.143" = [ "lake2-ipmi" ];
# Xeon compute
"10.0.40.1" = [ "owl1" ]; "10.0.42.1" = [ "owl1-ib" ]; "10.0.40.101" = [ "owl1-ipmi" ];
"10.0.40.2" = [ "owl2" ]; "10.0.42.2" = [ "owl2-ib" ]; "10.0.40.102" = [ "owl2-ipmi" ];
"10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ];
#"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ];
"10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ];
"10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ];
"10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ];
"10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ];
};
}

View File

@@ -1,23 +0,0 @@
{ pkgs, ... }:
{
# Infiniband (IPoIB)
environment.systemPackages = [ pkgs.rdma-core ];
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
networking = {
defaultGateway = "10.0.40.30";
nameservers = ["8.8.8.8"];
firewall = {
extraCommands = ''
# Prevent ssfhead from contacting our slurmd daemon
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
# But accept traffic to slurm ports from any other node in the subnet
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
# We also need to open the srun port range
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
'';
};
};
}

View File

@@ -1,16 +0,0 @@
{
# Use SSH tunnel to apex to reach internal hosts
programs.ssh.extraConfig = ''
Host tent
ProxyJump raccoon
# Access raccoon via the HTTP proxy
Host raccoon knights3.bsc.es
HostName knights3.bsc.es
ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p'
# Make sure we can reach gitlab even if we don't have SSH access to raccoon
Host bscpm04.bsc.es gitlab-internal.bsc.es
ProxyCommand=ssh apex 'nc -X connect -x localhost:23080 %h %p'
'';
}

View File

@@ -1,7 +1,9 @@
{ {
# Provides the base system for a xeon node, not necessarily in the SSF rack. # Provides the base system for a xeon node.
imports = [ imports = [
./base.nix ./base.nix
./xeon/fs.nix
./xeon/console.nix ./xeon/console.nix
./xeon/net.nix
]; ];
} }

94
m/common/xeon/net.nix Normal file
View File

@@ -0,0 +1,94 @@
{ pkgs, ... }:
{
# Infiniband (IPoIB)
environment.systemPackages = [ pkgs.rdma-core ];
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
networking = {
defaultGateway = "10.0.40.30";
nameservers = ["8.8.8.8"];
proxy = {
default = "http://hut:23080/";
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40";
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
# https://github.com/golang/go/issues/16715
allProxy = null;
};
firewall = {
extraCommands = ''
# Prevent ssfhead from contacting our slurmd daemon
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
# But accept traffic to slurm ports from any other node in the subnet
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
# We also need to open the srun port range
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
'';
};
extraHosts = ''
10.0.40.30 ssfhead
# Node Entry for node: mds01 (ID=72)
10.0.40.40 bay mds01 mds01-eth0
10.0.42.40 bay-ib mds01-ib0
10.0.40.141 bay-ipmi mds01-ipmi0 mds01-ipmi
# Node Entry for node: oss01 (ID=73)
10.0.40.41 oss01 oss01-eth0
10.0.42.41 oss01-ib0
10.0.40.142 oss01-ipmi0 oss01-ipmi
# Node Entry for node: oss02 (ID=74)
10.0.40.42 lake2 oss02 oss02-eth0
10.0.42.42 lake2-ib oss02-ib0
10.0.40.143 lake2-ipmi oss02-ipmi0 oss02-ipmi
# Node Entry for node: xeon01 (ID=15)
10.0.40.1 owl1 xeon01 xeon01-eth0
10.0.42.1 owl1-ib xeon01-ib0
10.0.40.101 owl1-ipmi xeon01-ipmi0 xeon01-ipmi
# Node Entry for node: xeon02 (ID=16)
10.0.40.2 owl2 xeon02 xeon02-eth0
10.0.42.2 owl2-ib xeon02-ib0
10.0.40.102 owl2-ipmi xeon02-ipmi0 xeon02-ipmi
# Node Entry for node: xeon03 (ID=17)
10.0.40.3 xeon03 xeon03-eth0
10.0.42.3 xeon03-ib0
10.0.40.103 xeon03-ipmi0 xeon03-ipmi
# Node Entry for node: xeon04 (ID=18)
10.0.40.4 xeon04 xeon04-eth0
10.0.42.4 xeon04-ib0
10.0.40.104 xeon04-ipmi0 xeon04-ipmi
# Node Entry for node: xeon05 (ID=19)
10.0.40.5 koro xeon05 xeon05-eth0
10.0.42.5 koro-ib xeon05-ib0
10.0.40.105 koro-ipmi xeon05-ipmi0
# Node Entry for node: xeon06 (ID=20)
10.0.40.6 xeon06 xeon06-eth0
10.0.42.6 xeon06-ib0
10.0.40.106 xeon06-ipmi0 xeon06-ipmi
# Node Entry for node: xeon07 (ID=21)
10.0.40.7 hut xeon07 xeon07-eth0
10.0.42.7 hut-ib xeon07-ib0
10.0.40.107 hut-ipmi xeon07-ipmi0 xeon07-ipmi
# Node Entry for node: xeon08 (ID=22)
10.0.40.8 eudy xeon08 xeon08-eth0
10.0.42.8 eudy-ib xeon08-ib0
10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi
# fox
10.0.40.26 fox
10.0.40.126 fox-ipmi
'';
};
}

View File

@@ -2,7 +2,7 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/xeon.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix") #(modulesPath + "/installer/netboot/netboot-minimal.nix")
./kernel/kernel.nix ./kernel/kernel.nix

View File

@@ -2,18 +2,13 @@
{ {
imports = [ imports = [
../common/base.nix ../common/xeon.nix
../common/xeon/console.nix ../module/ceph.nix
../module/emulation.nix ../module/emulation.nix
../module/nvidia.nix
../module/slurm-client.nix ../module/slurm-client.nix
./wireguard.nix ../module/slurm-firewall.nix
]; ];
# Don't turn off on August as UPC has different dates.
# Fox works fine on power cuts.
systemd.timers.august-shutdown.enable = false;
# Select the this using the ID to avoid mismatches # Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103"; boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
@@ -21,71 +16,30 @@
swapDevices = lib.mkForce []; swapDevices = lib.mkForce [];
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ]; boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
boot.kernelModules = [ "kvm-amd" "amd_uncore" ]; boot.kernelModules = [ "kvm-amd" ];
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
hardware.cpu.intel.updateMicrocode = lib.mkForce false; hardware.cpu.intel.updateMicrocode = lib.mkForce false;
# Use performance for benchmarks
powerManagement.cpuFreqGovernor = "performance";
# Disable NUMA balancing
boot.kernel.sysctl."kernel.numa_balancing" = 0;
# Expose kernel addresses
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
services.openssh.settings.X11Forwarding = true;
services.fail2ban.enable = true;
# Use SSH tunnel to reach internal hosts
programs.ssh.extraConfig = ''
Host bscpm04.bsc.es gitlab-internal.bsc.es tent
ProxyJump raccoon
Host raccoon
ProxyJump apex
HostName 127.0.0.1
Port 22022
'';
networking = { networking = {
timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
hostName = "fox"; hostName = "fox";
# UPC network (may change over time, use DHCP) interfaces.enp1s0f0np0.ipv4.addresses = [ {
# Public IP configuration: address = "10.0.40.26";
# - Hostname: fox.ac.upc.edu prefixLength = 24;
# - IP: 147.83.30.141 } ];
# - Gateway: 147.83.30.130
# - NetMask: 255.255.255.192
# Private IP configuration for BMC:
# - Hostname: fox-ipmi.ac.upc.edu
# - IP: 147.83.35.27
# - Gateway: 147.83.35.2
# - NetMask: 255.255.255.0
interfaces.enp1s0f0np0.useDHCP = true;
}; };
# Use hut for cache # Configure Nvidia driver to use with CUDA
nix.settings = { hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
extra-substituters = [ "https://jungle.bsc.es/cache" ]; hardware.graphics.enable = true;
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; nixpkgs.config.allowUnfree = true;
}; nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
# Recommended for new graphics cards
hardware.nvidia.open = true;
# Mount NVME disks # Mount NVME disks
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; }; fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; }; fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
# Mount the NFS home
fileSystems."/nfs/home" = {
device = "10.106.0.30:/home";
fsType = "nfs";
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
# Make a /nvme{0,1}/$USER directory for each user. # Make a /nvme{0,1}/$USER directory for each user.
systemd.services.create-nvme-dirs = let systemd.services.create-nvme-dirs = let
# Take only normal users in fox # Take only normal users in fox

View File

@@ -1,46 +0,0 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgFox.file = ../../secrets/wg-fox.age;
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
# Determines the IP address and subnet of the server's end of the tunnel interface.
ips = [ "10.106.0.1/24" ];
# The port that WireGuard listens to. Must be accessible by the client.
listenPort = 666;
# Path to the private key file.
privateKeyFile = config.age.secrets.wgFox.path;
# Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
peers = [
# List of allowed peers.
{
name = "Apex";
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
allowedIPs = [ "10.106.0.30/32" ];
}
];
};
};
networking.hosts = {
"10.106.0.30" = [ "apex" ];
};
networking.firewall = {
extraCommands = ''
# Accept slurm connections to slurmd from apex (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
'';
};
}

View File

@@ -3,12 +3,160 @@ modules:
prober: http prober: http
timeout: 5s timeout: 5s
http: http:
proxy_url: "http://127.0.0.1:23080"
skip_resolve_phase_with_proxy: true
follow_redirects: true follow_redirects: true
preferred_ip_protocol: "ip4"
valid_status_codes: [] # Defaults to 2xx valid_status_codes: [] # Defaults to 2xx
method: GET method: GET
http_with_proxy:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
skip_resolve_phase_with_proxy: true
http_with_proxy_and_headers:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
proxy_connect_header:
Proxy-Authorization:
- Bearer token
http_post_2xx:
prober: http
timeout: 5s
http:
method: POST
headers:
Content-Type: application/json
body: '{}'
http_post_body_file:
prober: http
timeout: 5s
http:
method: POST
body_file: "/files/body.txt"
http_basic_auth_example:
prober: http
timeout: 5s
http:
method: POST
headers:
Host: "login.example.com"
basic_auth:
username: "username"
password: "mysecret"
http_2xx_oauth_client_credentials:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
follow_redirects: true
preferred_ip_protocol: "ip4"
valid_status_codes:
- 200
- 201
oauth2:
client_id: "client_id"
client_secret: "client_secret"
token_url: "https://api.example.com/token"
endpoint_params:
grant_type: "client_credentials"
http_custom_ca_example:
prober: http
http:
method: GET
tls_config:
ca_file: "/certs/my_cert.crt"
http_gzip:
prober: http
http:
method: GET
compression: gzip
http_gzip_with_accept_encoding:
prober: http
http:
method: GET
compression: gzip
headers:
Accept-Encoding: gzip
tls_connect:
prober: tcp
timeout: 5s
tcp:
tls: true
tcp_connect_example:
prober: tcp
timeout: 5s
imap_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "OK.*STARTTLS"
- send: ". STARTTLS"
- expect: "OK"
- starttls: true
- send: ". capability"
- expect: "CAPABILITY IMAP4rev1"
smtp_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "^220 ([^ ]+) ESMTP (.+)$"
- send: "EHLO prober\r"
- expect: "^250-STARTTLS"
- send: "STARTTLS\r"
- expect: "^220"
- starttls: true
- send: "EHLO prober\r"
- expect: "^250-AUTH"
- send: "QUIT\r"
irc_banner_example:
prober: tcp
timeout: 5s
tcp:
query_response:
- send: "NICK prober"
- send: "USER prober prober prober :prober"
- expect: "PING :([^ ]+)"
send: "PONG ${1}"
- expect: "^:[^ ]+ 001"
icmp: icmp:
prober: icmp prober: icmp
timeout: 5s timeout: 5s
icmp: icmp:
preferred_ip_protocol: "ip4" preferred_ip_protocol: "ip4"
dns_udp_example:
prober: dns
timeout: 5s
dns:
query_name: "www.prometheus.io"
query_type: "A"
valid_rcodes:
- NOERROR
validate_answer_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
fail_if_all_match_regexp:
- ".*127.0.0.1"
fail_if_not_matches_regexp:
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
fail_if_none_matches_regexp:
- "127.0.0.1"
validate_authority_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
validate_additional_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
dns_soa:
prober: dns
dns:
query_name: "prometheus.io"
query_type: "SOA"
dns_tcp_example:
prober: dns
dns:
transport_protocol: "tcp" # defaults to "udp"
preferred_ip_protocol: "ip4" # defaults to "ip6"
query_name: "www.prometheus.io"

View File

@@ -2,14 +2,16 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/xeon.nix
../module/ceph.nix ../module/ceph.nix
../module/debuginfod.nix ../module/debuginfod.nix
../module/emulation.nix ../module/emulation.nix
../module/slurm-client.nix
./gitlab-runner.nix ./gitlab-runner.nix
./monitoring.nix ./monitoring.nix
./nfs.nix ./nfs.nix
./slurm-server.nix
./nix-serve.nix ./nix-serve.nix
./public-inbox.nix ./public-inbox.nix
./gitea.nix ./gitea.nix
@@ -54,11 +56,6 @@
iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept
''; '';
# Flush all rules and chains on stop so it won't break on start
extraStopCommands = ''
iptables -F
iptables -X
'';
}; };
}; };

View File

@@ -22,8 +22,8 @@
"--docker-network-mode host" "--docker-network-mode host"
]; ];
environmentVariables = { environmentVariables = {
https_proxy = "http://hut:23080"; https_proxy = "http://localhost:23080";
http_proxy = "http://hut:23080"; http_proxy = "http://localhost:23080";
}; };
}; };
in { in {
@@ -38,13 +38,14 @@
gitlab-bsc-docker = { gitlab-bsc-docker = {
# gitlab.bsc.es still uses the old token mechanism # gitlab.bsc.es still uses the old token mechanism
registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path; registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path;
tagList = [ "docker" "hut" ];
environmentVariables = { environmentVariables = {
# We cannot access the hut local interface from docker, so we connect https_proxy = "http://localhost:23080";
# to hut directly via the ethernet one. http_proxy = "http://localhost:23080";
https_proxy = "http://hut:23080";
http_proxy = "http://hut:23080";
}; };
# FIXME
registrationFlags = [
"--docker-network-mode host"
];
executor = "docker"; executor = "docker";
dockerImage = "alpine"; dockerImage = "alpine";
dockerVolumes = [ dockerVolumes = [
@@ -52,15 +53,7 @@
"/nix/var/nix/db:/nix/var/nix/db:ro" "/nix/var/nix/db:/nix/var/nix/db:ro"
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro" "/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
]; ];
dockerExtraHosts = [
# Required to pass the proxy via hut
"hut:10.0.40.7"
];
dockerDisableCache = true; dockerDisableCache = true;
registrationFlags = [
# Increase build log length to 64 MiB
"--output-limit 65536"
];
preBuildScript = pkgs.writeScript "setup-container" '' preBuildScript = pkgs.writeScript "setup-container" ''
mkdir -p -m 0755 /nix/var/log/nix/drvs mkdir -p -m 0755 /nix/var/log/nix/drvs
mkdir -p -m 0755 /nix/var/nix/gcroots mkdir -p -m 0755 /nix/var/nix/gcroots
@@ -73,39 +66,32 @@
mkdir -p -m 0700 "$HOME/.nix-defexpr" mkdir -p -m 0700 "$HOME/.nix-defexpr"
mkdir -p -m 0700 "$HOME/.ssh" mkdir -p -m 0700 "$HOME/.ssh"
cat > "$HOME/.ssh/config" << EOF cat > "$HOME/.ssh/config" << EOF
Host bscpm04.bsc.es gitlab-internal.bsc.es Host bscpm03.bsc.es gitlab-internal.bsc.es
User git User git
ProxyCommand nc -X connect -x hut:23080 %h %p ProxyCommand nc -X connect -x hut:23080 %h %p
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
ProxyCommand nc -X connect -x hut:23080 %h %p ProxyCommand nc -X connect -x hut:23080 %h %p
EOF EOF
cat >> "$HOME/.ssh/known_hosts" << EOF cat >> "$HOME/.ssh/known_hosts" << EOF
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT bscpm03.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3 gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
EOF EOF
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh . ${pkgs.nix}/etc/profile.d/nix-daemon.sh
# Required to load SSL certificate paths ${pkgs.nix}/bin/nix-channel --add https://nixos.org/channels/nixos-24.11 nixpkgs
. ${pkgs.cacert}/nix-support/setup-hook ${pkgs.nix}/bin/nix-channel --update nixpkgs
${pkgs.nix}/bin/nix-env -i ${lib.concatStringsSep " " (with pkgs; [ nix cacert git openssh netcat curl ])}
''; '';
environmentVariables = { environmentVariables = {
ENV = "/etc/profile"; ENV = "/etc/profile";
USER = "root"; USER = "root";
NIX_REMOTE = "daemon"; NIX_REMOTE = "daemon";
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin"; PATH = "/nix/var/nix/profiles/default/bin:/nix/var/nix/profiles/default/sbin:/bin:/sbin:/usr/bin:/usr/sbin";
NIX_SSL_CERT_FILE = "/nix/var/nix/profiles/default/etc/ssl/certs/ca-bundle.crt";
}; };
}; };
}; };
}; };
# DOCKER* chains are useless, override at FORWARD and nixos-fw
networking.firewall.extraCommands = ''
# Don't forward any traffic from docker
iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse
# Allow incoming traffic from docker to 23080
iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT
'';
#systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash";
systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false;
systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner";

View File

@@ -3,10 +3,7 @@
{ {
imports = [ imports = [
../module/slurm-exporter.nix ../module/slurm-exporter.nix
../module/meteocat-exporter.nix
../module/upc-qaire-exporter.nix
./gpfs-probe.nix ./gpfs-probe.nix
../module/nix-daemon-exporter.nix
]; ];
age.secrets.grafanaJungleRobotPassword = { age.secrets.grafanaJungleRobotPassword = {
@@ -49,7 +46,7 @@
services.prometheus = { services.prometheus = {
enable = true; enable = true;
port = 9001; port = 9001;
retentionTime = "5y"; retentionTime = "1y";
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
}; };
@@ -79,7 +76,7 @@
group = "root"; group = "root";
user = "root"; user = "root";
configFile = config.age.secrets.ipmiYml.path; configFile = config.age.secrets.ipmiYml.path;
# extraFlags = [ "--log.level=debug" ]; extraFlags = [ "--log.level=debug" ];
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
}; };
node = { node = {
@@ -111,9 +108,6 @@
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
"127.0.0.1:9341" # Slurm exporter "127.0.0.1:9341" # Slurm exporter
"127.0.0.1:9966" # GPFS custom exporter "127.0.0.1:9966" # GPFS custom exporter
"127.0.0.1:9999" # Nix-daemon custom exporter
"127.0.0.1:9929" # Meteocat custom exporter
"127.0.0.1:9928" # UPC Qaire custom exporter
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
]; ];
}]; }];
@@ -169,9 +163,6 @@
"8.8.8.8" "8.8.8.8"
"ssfhead" "ssfhead"
"anella-bsc.cesca.cat" "anella-bsc.cesca.cat"
"upc-anella.cesca.cat"
"fox.ac.upc.edu"
"arenys5.ac.upc.edu"
]; ];
}]; }];
relabel_configs = [ relabel_configs = [
@@ -260,12 +251,15 @@
}; };
} }
{ {
job_name = "raccoon"; job_name = "ipmi-fox";
metrics_path = "/ipmi";
static_configs = [ static_configs = [
{ { targets = [ "127.0.0.1:9290" ]; }
targets = [ "127.0.0.1:19002" ]; # Node exporter
}
]; ];
params = {
target = [ "fox-ipmi" ];
module = [ "fox" ];
};
} }
]; ];
}; };

View File

@@ -12,19 +12,16 @@ let
installPhase = '' installPhase = ''
cp -r public $out cp -r public $out
''; '';
# Don't mess doc/
dontFixup = true;
}; };
in in
{ {
networking.firewall.allowedTCPPorts = [ 80 ];
services.nginx = { services.nginx = {
enable = true; enable = true;
virtualHosts."jungle.bsc.es" = { virtualHosts."jungle.bsc.es" = {
root = "${website}"; root = "${website}";
listen = [ listen = [
{ {
addr = "0.0.0.0"; addr = "127.0.0.1";
port = 80; port = 80;
} }
]; ];
@@ -41,7 +38,7 @@ in
proxy_redirect http:// $scheme://; proxy_redirect http:// $scheme://;
} }
location /cache { location /cache {
rewrite ^/cache/(.*) /$1 break; rewrite ^/cache(.*) /$1 break;
proxy_pass http://127.0.0.1:5000; proxy_pass http://127.0.0.1:5000;
proxy_redirect http:// $scheme://; proxy_redirect http:// $scheme://;
} }

7
m/hut/slurm-server.nix Normal file
View File

@@ -0,0 +1,7 @@
{ ... }:
{
services.slurm = {
server.enable = true;
};
}

View File

@@ -4,7 +4,7 @@
- xeon03-ipmi - xeon03-ipmi
- xeon04-ipmi - xeon04-ipmi
- koro-ipmi - koro-ipmi
- weasel-ipmi - xeon06-ipmi
- hut-ipmi - hut-ipmi
- eudy-ipmi - eudy-ipmi
# Storage # Storage

View File

@@ -2,7 +2,7 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/xeon.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix") #(modulesPath + "/installer/netboot/netboot-minimal.nix")
../eudy/cpufreq.nix ../eudy/cpufreq.nix

View File

@@ -2,7 +2,7 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/xeon.nix
../module/monitoring.nix ../module/monitoring.nix
]; ];

View File

@@ -1,70 +0,0 @@
{
# In physical order from top to bottom (see note below)
ssf = {
# Switches for Ethernet and OmniPath
switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; };
switch-opa = { pos=41; size=1; };
# SSF login
apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; };
# Storage
bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; };
lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; };
# Compute xeon
owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; };
owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; };
xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
# Slot 34 empty
koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };
# 16 KNL nodes, 4 per chassis
knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; };
knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; };
knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; };
knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; };
# Slot 19 empty
# EPI (hw team, guessed order)
epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; };
epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; };
epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; };
anon = { pos=14; size=2; }; # Unlabeled machine. Operative
# These are old and decommissioned (off)
power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; };
powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; };
gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; };
odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; };
amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic
moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; };
};
bsc2218 = {
raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; };
tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; };
};
upc = {
fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; };
};
# NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the
# bottom. Example:
#
# | ... | - [pos+size] <--- Label in chassis
# +--------+
# | node | - [pos+1]
# | 2U | - [pos]
# +------- +
# | ... | - [pos-1]
#
# NOTE: The board and sn refers to the FRU information (Board Product and
# Board Serial) via `ipmitool fru print 0`.
}

View File

@@ -1,10 +0,0 @@
{ config, ... }:
{
nix.settings =
# Don't add hut as a cache to itself
assert config.networking.hostName != "hut";
{
extra-substituters = [ "http://hut/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-meteocat-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter";
};
};
}

View File

@@ -1,26 +0,0 @@
#!/bin/sh
# Locate nix daemon pid
nd=$(pgrep -o nix-daemon)
# Locate children of nix-daemon
pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children")
# For each children, locate 2nd level children
pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n')
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
# HELP nix_daemon_build Nix daemon derivation build state.
# TYPE nix_daemon_build gauge
EOF
for pid in $pids2; do
name=$(cat /proc/$pid/environ 2>/dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.')
user=$(ps -o uname= -p "$pid")
if [ -n "$name" -a -n "$user" ]; then
printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name"
fi
done

View File

@@ -1,23 +0,0 @@
{ pkgs, config, lib, ... }:
let
script = pkgs.runCommand "nix-daemon-exporter.sh" { }
''
cp ${./nix-daemon-builds.sh} $out;
chmod +x $out
''
;
in
{
systemd.services.nix-daemon-exporter = {
description = "Daemon to export nix-daemon metrics";
path = [ pkgs.procps pkgs.ripgrep ];
wantedBy = [ "default.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}";
# Needed root to read the environment, potentially unsafe
User = "root";
Group = "root";
};
};
}

View File

@@ -1,20 +0,0 @@
{ lib, config, pkgs, ... }:
{
# Configure Nvidia driver to use with CUDA
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set");
hardware.graphics.enable = true;
nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
# enable support for derivations which require nvidia-gpu to be available
# > requiredSystemFeatures = [ "cuda" ];
programs.nix-required-mounts.enable = true;
programs.nix-required-mounts.presets.nvidia-gpu.enable = true;
# They forgot to add the symlink
programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [
config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument
];
environment.systemPackages = [ pkgs.cudainfo ];
}

View File

@@ -1,68 +0,0 @@
{ config, lib, pkgs, ... }:
let
cfg = config.services.p;
in
{
options = {
services.p = {
enable = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Whether to enable the p service.";
};
path = lib.mkOption {
type = lib.types.str;
default = "/var/lib/p";
description = "Where to save the pasted files on disk.";
};
url = lib.mkOption {
type = lib.types.str;
default = "https://jungle.bsc.es/p";
description = "URL prefix for the printed file.";
};
};
};
config = lib.mkIf cfg.enable {
environment.systemPackages = let
p = pkgs.writeShellScriptBin "p" ''
set -e
pastedir="${cfg.path}/$USER"
cd "$pastedir"
ext="txt"
if [ -n "$1" ]; then
ext="$1"
fi
out=$(mktemp "XXXXXXXX.$ext")
cat > "$out"
chmod go+r "$out"
echo "${cfg.url}/$USER/$out"
'';
in [ p ];
systemd.services.p = let
# Take only normal users
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
# Create a directory for each user
commands = lib.concatLists (lib.mapAttrsToList (_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 ${cfg.path}/${user.name}"
]) users);
in {
description = "P service setup";
requires = [ "network-online.target" ];
#wants = [ "remote-fs.target" ];
#after = [ "remote-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = pkgs.writeShellScript "p-init.sh" (''
install -d -o root -g root -m 0755 ${cfg.path}
'' + (lib.concatLines commands));
};
};
};
}

View File

@@ -1,33 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.power.policy;
in
{
options = {
power.policy = mkOption {
type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]);
default = null;
description = "Set power policy to use via IPMI.";
};
};
config = mkIf (cfg != null) {
systemd.services."power-policy" = {
description = "Set power policy to use via IPMI";
wantedBy = [ "multi-user.target" ];
unitConfig = {
StartLimitBurst = "10";
StartLimitIntervalSec = "10m";
};
serviceConfig = {
ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}";
Type = "oneshot";
Restart = "on-failure";
RestartSec = "5s";
};
};
};
}

View File

@@ -1,10 +1,49 @@
{ lib, ... }: { config, pkgs, lib, ... }:
{ let
imports = [ suspendProgram = pkgs.writeScript "suspend.sh" ''
./slurm-common.nix #!/usr/bin/env bash
]; exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeScript "resume.sh" ''
#!/usr/bin/env bash
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
prolog = pkgs.writeScript "prolog.sh" ''
#!/usr/bin/env bash
echo "hello from the prolog"
exit 0
'';
epilog = pkgs.writeScript "epilog.sh" ''
#!/usr/bin/env bash
echo "hello from the epilog"
exit 0
'';
in {
systemd.services.slurmd.serviceConfig = { systemd.services.slurmd.serviceConfig = {
# Kill all processes in the control group on stop/restart. This will kill # Kill all processes in the control group on stop/restart. This will kill
# all the jobs running, so ensure that we only upgrade when the nodes are # all the jobs running, so ensure that we only upgrade when the nodes are
@@ -14,5 +53,93 @@
KillMode = lib.mkForce "control-group"; KillMode = lib.mkForce "control-group";
}; };
services.slurm.client.enable = true; services.slurm = {
client.enable = true;
controlMachine = "hut";
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=2 Feature=fox"
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
extraConfig = ''
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
# not with Intel MPI. For that use the compatibility shim libpmi.so
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
# library in SLURM (--mpi=pmix). See more details here:
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
MpiDefault=pmix
# When a node reboots return that node to the slurm queue as soon as it
# becomes operative again.
ReturnToService=2
# Track all processes by using a cgroup
ProctrackType=proctrack/cgroup
# Enable task/affinity to allow the jobs to run in a specified subset of
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=hut,fox
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_Core
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
# This sets up the "extern" step into which ssh-launched processes will be
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
# when a task runs (srun) so we can ssh early.
PrologFlags=Alloc,Contain,X11
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
# adopted by the external step, similar to tasks running in regular steps
# LaunchParameters=ulimit_pam_adopt
SlurmdDebug=debug5
#DebugFlags=Protocol,Cgroup
'';
extraCgroupConfig = ''
CgroupPlugin=cgroup/v2
#ConstrainCores=yes
'';
};
# Place the slurm config in /etc as this will be required by PAM
environment.etc.slurm.source = config.services.slurm.etcSlurm;
age.secrets.mungeKey = {
file = ../../secrets/munge-key.age;
owner = "munge";
group = "munge";
};
services.munge = {
enable = true;
password = config.age.secrets.mungeKey.path;
};
} }

View File

@@ -1,115 +0,0 @@
{ config, pkgs, ... }:
let
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeShellScript "resume.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
in {
services.slurm = {
controlMachine = "apex";
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
extraConfig = ''
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
# not with Intel MPI. For that use the compatibility shim libpmi.so
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
# library in SLURM (--mpi=pmix). See more details here:
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
MpiDefault=pmix
# When a node reboots return that node to the slurm queue as soon as it
# becomes operative again.
ReturnToService=2
# Track all processes by using a cgroup
ProctrackType=proctrack/cgroup
# Enable task/affinity to allow the jobs to run in a specified subset of
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=fox
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_Core
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
# This sets up the "extern" step into which ssh-launched processes will be
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
# when a task runs (srun) so we can ssh early.
PrologFlags=Alloc,Contain,X11
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
# adopted by the external step, similar to tasks running in regular steps
# LaunchParameters=ulimit_pam_adopt
SlurmdDebug=debug5
#DebugFlags=Protocol,Cgroup
'';
extraCgroupConfig = ''
CgroupPlugin=cgroup/v2
#ConstrainCores=yes
'';
};
# Place the slurm config in /etc as this will be required by PAM
environment.etc.slurm.source = config.services.slurm.etcSlurm;
age.secrets.mungeKey = {
file = ../../secrets/munge-key.age;
owner = "munge";
group = "munge";
};
services.munge = {
enable = true;
password = config.age.secrets.mungeKey.path;
};
}

View File

@@ -1,23 +0,0 @@
{ ... }:
{
imports = [
./slurm-common.nix
];
services.slurm.server.enable = true;
networking.firewall = {
extraCommands = ''
# Accept slurm connections to controller from compute nodes
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from compute nodes for srun
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
# Accept slurm connections to controller from fox (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from fox for srun (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
'';
};
}

View File

@@ -1,8 +0,0 @@
{
programs.ssh.extraConfig = ''
Host apex ssfhead
HostName ssflogin.bsc.es
Host hut
ProxyJump apex
'';
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-upc-qaire-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter";
};
};
}

View File

@@ -1,35 +0,0 @@
{config, ...}:
{
age.secrets.vpn-dac-login.file = ../../secrets/vpn-dac-login.age;
age.secrets.vpn-dac-client-key.file = ../../secrets/vpn-dac-client-key.age;
services.openvpn.servers = {
# systemctl status openvpn-dac.service
dac = {
config = ''
client
dev tun
proto tcp
remote vpn.ac.upc.edu 1194
remote vpn.ac.upc.edu 80
resolv-retry infinite
nobind
persist-key
persist-tun
ca ${./vpn-dac/ca.crt}
cert ${./vpn-dac/client.crt}
# Only key needs to be secret
key ${config.age.secrets.vpn-dac-client-key.path}
remote-cert-tls server
comp-lzo
verb 3
auth-user-pass ${config.age.secrets.vpn-dac-login.path}
reneg-sec 0
# Only route fox-ipmi
pull-filter ignore "route "
route 147.83.35.27 255.255.255.255
'';
};
};
}

View File

@@ -1,31 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIFUjCCBDqgAwIBAgIJAJH118PApk5hMA0GCSqGSIb3DQEBCwUAMIHLMQswCQYD
VQQGEwJFUzESMBAGA1UECBMJQmFyY2Vsb25hMRIwEAYDVQQHEwlCYXJjZWxvbmEx
LTArBgNVBAoTJFVuaXZlcnNpdGF0IFBvbGl0ZWNuaWNhIGRlIENhdGFsdW55YTEk
MCIGA1UECxMbQXJxdWl0ZWN0dXJhIGRlIENvbXB1dGFkb3JzMRAwDgYDVQQDEwdM
Q0FDIENBMQ0wCwYDVQQpEwRMQ0FDMR4wHAYJKoZIhvcNAQkBFg9sY2FjQGFjLnVw
Yy5lZHUwHhcNMTYwMTEyMTI0NDIxWhcNNDYwMTEyMTI0NDIxWjCByzELMAkGA1UE
BhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0w
KwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAi
BgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENB
QyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMu
ZWR1MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0CteSeof7Xwi51kC
F0nQ4E9iR5Lq7wtfRuVPn6JJcIxJJ6+F9gr4R/HIHTztW4XAzReE36DYfexupx3D
6UgQIkMLlVyGqRbulNF+RnCx20GosF7Dm4RGBVvOxBP1PGjYq/A+XhaaDAFd0cOF
LMNkzuYP7PF0bnBEaHnxmN8bPmuyDyas7fK9AAc3scyWT2jSBPbOVFvCJwPg8MH9
V/h+hKwL/7hRt1MVfVv2qyIuKwTki8mUt0RcVbP7oJoRY5K1+R52phIz/GL/b4Fx
L6MKXlQxLi8vzP4QZXgCMyV7oFNdU3VqCEXBA11YIRvsOZ4QS19otIk/ZWU5x+HH
LAIJ7wIDAQABo4IBNTCCATEwHQYDVR0OBBYEFNyezX1cH1N4QR14ebBpljqmtE7q
MIIBAAYDVR0jBIH4MIH1gBTcns19XB9TeEEdeHmwaZY6prRO6qGB0aSBzjCByzEL
MAkGA1UEBhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vs
b25hMS0wKwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVu
eWExJDAiBgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UE
AxMHTENBQyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0Bh
Yy51cGMuZWR1ggkAkfXXw8CmTmEwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsF
AAOCAQEAUAmOvVXIQrR+aZVO0bOTeugKBHB75eTIZSIHIn2oDUvDbAP5GXIJ56A1
6mZXxemSMY8/9k+pRcwJhfat3IgvAN159XSqf9kRv0NHgc3FWUI1Qv/BsAn0vJO/
oK0dbmbbRWqt86qNrCN+cUfz5aovvxN73jFfnvfDQFBk/8enj9wXxYfokjjLPR1Q
+oTkH8dY68qf71oaUB9MndppPEPSz0K1S6h1XxvJoSu9MVSXOQHiq1cdZdxRazI3
4f7q9sTCL+khwDAuZxAYzlEYxFFa/NN8PWU6xPw6V+t/aDhOiXUPJQB/O/K7mw3Z
TQQx5NqM7B5jjak5fauR3/oRD8XXsA==
-----END CERTIFICATE-----

View File

@@ -1,100 +0,0 @@
Certificate:
Data:
Version: 3 (0x2)
Serial Number: 2 (0x2)
Signature Algorithm: sha256WithRSAEncryption
Issuer: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
Validity
Not Before: Jan 12 12:45:41 2016 GMT
Not After : Jan 12 12:45:41 2046 GMT
Subject: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=client/name=LCAC/emailAddress=lcac@ac.upc.edu
Subject Public Key Info:
Public Key Algorithm: rsaEncryption
Public-Key: (2048 bit)
Modulus:
00:97:99:fa:7a:0e:4d:e2:1d:a5:b1:a8:14:18:64:
c7:66:bf:de:99:1d:92:3b:86:82:4d:95:39:f7:a6:
56:49:97:14:4f:e3:37:00:6c:f4:d0:1d:56:79:e7:
19:b5:dd:36:15:8e:1d:57:7b:59:29:d2:11:bf:58:
48:e0:f7:41:3d:16:64:8d:a2:0b:4a:ac:fa:c6:83:
dc:10:2a:2c:d9:97:48:ee:11:2a:bc:4b:60:dd:b9:
2e:8f:45:ca:87:0b:38:65:1c:f8:a2:1d:f9:50:aa:
6e:60:f9:48:df:57:12:23:e1:e7:0c:81:5c:9f:c5:
b2:e6:99:99:95:30:6d:57:36:06:8c:fd:fb:f9:4f:
60:d2:3c:ba:ae:28:56:2f:da:58:5c:e8:c5:7b:ec:
76:d9:28:6e:fb:8c:07:f9:d7:23:c3:72:76:3c:fa:
dc:20:67:8f:cc:16:e0:91:07:d5:68:f9:20:4d:7d:
5c:2d:02:04:16:76:52:f3:53:be:a3:dc:0d:d5:fb:
6b:55:29:f3:52:35:c8:7d:99:d1:4a:94:be:b1:8e:
fd:85:18:25:eb:41:e9:56:da:af:62:84:20:0a:00:
17:94:92:94:91:6a:f8:54:37:17:ee:1e:bb:fb:93:
71:91:d9:e4:e9:b8:3b:18:7d:6d:7d:4c:ce:58:55:
f9:41
Exponent: 65537 (0x10001)
X509v3 extensions:
X509v3 Basic Constraints:
CA:FALSE
Netscape Comment:
Easy-RSA Generated Certificate
X509v3 Subject Key Identifier:
1B:88:06:D5:33:1D:5C:48:46:B5:DE:78:89:36:96:91:3A:74:43:18
X509v3 Authority Key Identifier:
keyid:DC:9E:CD:7D:5C:1F:53:78:41:1D:78:79:B0:69:96:3A:A6:B4:4E:EA
DirName:/C=ES/ST=Barcelona/L=Barcelona/O=Universitat Politecnica de Catalunya/OU=Arquitectura de Computadors/CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
serial:91:F5:D7:C3:C0:A6:4E:61
X509v3 Extended Key Usage:
TLS Web Client Authentication
X509v3 Key Usage:
Digital Signature
X509v3 Subject Alternative Name:
DNS:client
Signature Algorithm: sha256WithRSAEncryption
42:e8:50:b2:e7:88:75:86:0b:bb:29:e3:aa:c6:0e:4c:e8:ea:
3d:0c:02:31:7f:3b:80:0c:3f:80:af:45:d6:62:27:a0:0e:e7:
26:09:12:97:95:f8:d9:9b:89:b5:ef:56:64:f1:de:82:74:e0:
31:0a:cc:90:0a:bd:50:b8:54:95:0a:ae:3b:40:df:76:b6:d1:
01:2e:f3:96:9f:52:d4:e9:14:6d:b7:14:9d:45:99:33:36:2a:
01:0b:15:1a:ed:55:dc:64:83:65:1a:06:42:d9:c7:dc:97:d4:
02:81:c2:58:2b:ea:e4:b7:ae:84:3a:e4:3f:f1:2e:fa:ec:f3:
40:5d:b8:6a:d5:5e:e1:e8:2f:e2:2f:48:a4:38:a1:4f:22:e3:
4f:66:94:aa:02:78:9a:2b:7a:5d:aa:aa:51:a5:e3:d0:91:e9:
1d:f9:08:ed:8b:51:c9:a6:af:46:85:b5:1c:ed:12:a1:28:33:
75:36:00:d8:5c:14:65:96:c0:28:7d:47:50:a4:89:5f:b0:72:
1a:4b:13:17:26:0f:f0:b8:65:3c:e9:96:36:f9:bf:90:59:33:
87:1f:01:03:25:f8:f0:3a:9b:33:02:d0:0a:43:b5:0a:cf:62:
a1:45:38:37:07:9d:9c:94:0b:31:c6:3c:34:b7:fc:5a:0c:e4:
bf:23:f6:7d
-----BEGIN CERTIFICATE-----
MIIFqjCCBJKgAwIBAgIBAjANBgkqhkiG9w0BAQsFADCByzELMAkGA1UEBhMCRVMx
EjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0wKwYDVQQK
EyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAiBgNVBAsT
G0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENBQyBDQTEN
MAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MB4X
DTE2MDExMjEyNDU0MVoXDTQ2MDExMjEyNDU0MVowgcoxCzAJBgNVBAYTAkVTMRIw
EAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNlbG9uYTEtMCsGA1UEChMk
VW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1bnlhMSQwIgYDVQQLExtB
cnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxDzANBgNVBAMTBmNsaWVudDENMAsG
A1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MIIBIjAN
BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl5n6eg5N4h2lsagUGGTHZr/emR2S
O4aCTZU596ZWSZcUT+M3AGz00B1WeecZtd02FY4dV3tZKdIRv1hI4PdBPRZkjaIL
Sqz6xoPcECos2ZdI7hEqvEtg3bkuj0XKhws4ZRz4oh35UKpuYPlI31cSI+HnDIFc
n8Wy5pmZlTBtVzYGjP37+U9g0jy6rihWL9pYXOjFe+x22Shu+4wH+dcjw3J2PPrc
IGePzBbgkQfVaPkgTX1cLQIEFnZS81O+o9wN1ftrVSnzUjXIfZnRSpS+sY79hRgl
60HpVtqvYoQgCgAXlJKUkWr4VDcX7h67+5Nxkdnk6bg7GH1tfUzOWFX5QQIDAQAB
o4IBljCCAZIwCQYDVR0TBAIwADAtBglghkgBhvhCAQ0EIBYeRWFzeS1SU0EgR2Vu
ZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQWBBQbiAbVMx1cSEa13niJNpaROnRD
GDCCAQAGA1UdIwSB+DCB9YAU3J7NfVwfU3hBHXh5sGmWOqa0TuqhgdGkgc4wgcsx
CzAJBgNVBAYTAkVTMRIwEAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNl
bG9uYTEtMCsGA1UEChMkVW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1
bnlhMSQwIgYDVQQLExtBcnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxEDAOBgNV
BAMTB0xDQUMgQ0ExDTALBgNVBCkTBExDQUMxHjAcBgkqhkiG9w0BCQEWD2xjYWNA
YWMudXBjLmVkdYIJAJH118PApk5hMBMGA1UdJQQMMAoGCCsGAQUFBwMCMAsGA1Ud
DwQEAwIHgDARBgNVHREECjAIggZjbGllbnQwDQYJKoZIhvcNAQELBQADggEBAELo
ULLniHWGC7sp46rGDkzo6j0MAjF/O4AMP4CvRdZiJ6AO5yYJEpeV+NmbibXvVmTx
3oJ04DEKzJAKvVC4VJUKrjtA33a20QEu85afUtTpFG23FJ1FmTM2KgELFRrtVdxk
g2UaBkLZx9yX1AKBwlgr6uS3roQ65D/xLvrs80BduGrVXuHoL+IvSKQ4oU8i409m
lKoCeJorel2qqlGl49CR6R35CO2LUcmmr0aFtRztEqEoM3U2ANhcFGWWwCh9R1Ck
iV+wchpLExcmD/C4ZTzpljb5v5BZM4cfAQMl+PA6mzMC0ApDtQrPYqFFODcHnZyU
CzHGPDS3/FoM5L8j9n0=
-----END CERTIFICATE-----

View File

@@ -2,13 +2,12 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/xeon.nix
../module/ceph.nix ../module/ceph.nix
../module/emulation.nix ../module/emulation.nix
../module/slurm-client.nix ../module/slurm-client.nix
../module/slurm-firewall.nix ../module/slurm-firewall.nix
../module/debuginfod.nix ../module/debuginfod.nix
../module/hut-substituter.nix
]; ];
# Select the this using the ID to avoid mismatches # Select the this using the ID to avoid mismatches

View File

@@ -2,13 +2,12 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/xeon.nix
../module/ceph.nix ../module/ceph.nix
../module/emulation.nix ../module/emulation.nix
../module/slurm-client.nix ../module/slurm-client.nix
../module/slurm-firewall.nix ../module/slurm-firewall.nix
../module/debuginfod.nix ../module/debuginfod.nix
../module/hut-substituter.nix
]; ];
# Select the this using the ID to avoid mismatches # Select the this using the ID to avoid mismatches

View File

@@ -3,11 +3,6 @@
{ {
imports = [ imports = [
../common/base.nix ../common/base.nix
../module/emulation.nix
../module/debuginfod.nix
../module/ssh-hut-extern.nix
../module/nvidia.nix
../eudy/kernel/perf.nix
]; ];
# Don't install Grub on the disk yet # Don't install Grub on the disk yet
@@ -28,39 +23,14 @@
address = "84.88.51.152"; address = "84.88.51.152";
prefixLength = 25; prefixLength = 25;
} ]; } ];
interfaces.enp5s0f1.ipv4.addresses = [ {
address = "10.0.44.1";
prefixLength = 24;
} ];
nat = {
enable = true;
internalInterfaces = [ "enp5s0f1" ];
externalInterface = "eno0";
};
hosts = {
"10.0.44.4" = [ "tent" ];
"84.88.53.236" = [ "apex" ];
};
}; };
nix.settings = { # Configure Nvidia driver to use with CUDA
extra-substituters = [ "https://jungle.bsc.es/cache" ]; hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ]; hardware.graphics.enable = true;
}; nixpkgs.config.allowUnfree = true;
nixpkgs.config.nvidia.acceptLicense = true;
# Enable performance governor services.xserver.videoDrivers = [ "nvidia" ];
powerManagement.cpuFreqGovernor = "performance";
hardware.nvidia.open = false; # Maxwell is older than Turing architecture
services.openssh.settings.X11Forwarding = true;
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
users.motd = '' users.motd = ''

View File

@@ -1,14 +0,0 @@
modules:
http_2xx:
prober: http
timeout: 5s
http:
preferred_ip_protocol: "ip4"
follow_redirects: true
valid_status_codes: [] # Defaults to 2xx
method: GET
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"

View File

@@ -1,83 +0,0 @@
{ config, pkgs, lib, ... }:
{
imports = [
../common/xeon.nix
../module/emulation.nix
../module/debuginfod.nix
../module/ssh-hut-extern.nix
./monitoring.nix
./nginx.nix
./nix-serve.nix
./gitlab-runner.nix
./gitea.nix
../hut/public-inbox.nix
../hut/msmtp.nix
../module/p.nix
../module/vpn-dac.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675";
networking = {
hostName = "tent";
interfaces.eno1.ipv4.addresses = [
{
address = "10.0.44.4";
prefixLength = 24;
}
];
# Only BSC DNSs seem to be reachable from the office VLAN
nameservers = [ "84.88.52.35" "84.88.52.36" ];
search = [ "bsc.es" "ac.upc.edu" ];
defaultGateway = "10.0.44.1";
hosts = {
"84.88.53.236" = [ "apex" ];
};
};
services.p.enable = true;
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
boot.swraid = {
enable = true;
mdadmConf = ''
DEVICE partitions
ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d
MAILADDR root
'';
};
fileSystems."/vault" = {
device = "/dev/disk/by-label/vault";
fsType = "ext4";
};
# Make a /vault/$USER directory for each user.
systemd.services.create-vault-dirs = let
# Take only normal users in tent
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0711 /vault/home/${user.name}"
]) users);
script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "local-fs.target" ];
after = [ "local-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# disable automatic garbage collector
nix.gc.automatic = lib.mkForce false;
}

View File

@@ -1,32 +0,0 @@
{ config, lib, ... }:
{
services.gitea = {
enable = true;
appName = "Gitea in the jungle";
settings = {
server = {
ROOT_URL = "https://jungle.bsc.es/git/";
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
LANDING_PAGE = "explore";
};
metrics.ENABLED = true;
service = {
DISABLE_REGISTRATION = true;
REGISTER_MANUAL_CONFIRM = true;
ENABLE_NOTIFY_MAIL = true;
};
log.LEVEL = "Warn";
mailer = {
ENABLED = true;
FROM = "jungle-robot@bsc.es";
PROTOCOL = "sendmail";
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
SENDMAIL_ARGS = "--";
};
};
lfs.enable = true;
};
}

View File

@@ -1,93 +0,0 @@
{ pkgs, lib, config, ... }:
{
age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age;
age.secrets.tent-gitlab-runner-pm-docker.file = ../../secrets/tent-gitlab-runner-pm-docker-token.age;
age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age;
services.gitlab-runner = let sec = config.age.secrets; in {
enable = true;
settings.concurrent = 5;
services = {
# For gitlab.pm.bsc.es
gitlab-pm-shell = {
executor = "shell";
environmentVariables = {
SHELL = "${pkgs.bash}/bin/bash";
};
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-shell.path;
preGetSourcesScript = pkgs.writeScript "setup" ''
echo "This is the preGetSources script running, brace for impact"
env
'';
};
gitlab-pm-docker = {
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-docker.path;
executor = "docker";
dockerImage = "debian:stable";
};
# For gitlab.bsc.es
gitlab-bsc-docker = {
# gitlab.bsc.es still uses the old token mechanism
registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path;
tagList = [ "docker" "tent" "nix" ];
executor = "docker";
dockerImage = "alpine";
dockerVolumes = [
"/nix/store:/nix/store:ro"
"/nix/var/nix/db:/nix/var/nix/db:ro"
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
];
dockerDisableCache = true;
registrationFlags = [
# Increase build log length to 64 MiB
"--output-limit 65536"
];
preBuildScript = pkgs.writeScript "setup-container" ''
mkdir -p -m 0755 /nix/var/log/nix/drvs
mkdir -p -m 0755 /nix/var/nix/gcroots
mkdir -p -m 0755 /nix/var/nix/profiles
mkdir -p -m 0755 /nix/var/nix/temproots
mkdir -p -m 0755 /nix/var/nix/userpool
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
mkdir -p -m 0700 "$HOME/.nix-defexpr"
mkdir -p -m 0700 "$HOME/.ssh"
cat >> "$HOME/.ssh/known_hosts" << EOF
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
EOF
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
# Required to load SSL certificate paths
. ${pkgs.cacert}/nix-support/setup-hook
'';
environmentVariables = {
ENV = "/etc/profile";
USER = "root";
NIX_REMOTE = "daemon";
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
};
};
};
};
systemd.services.gitlab-runner.serviceConfig = {
DynamicUser = lib.mkForce false;
User = "gitlab-runner";
Group = "gitlab-runner";
ExecStart = lib.mkForce
''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}'';
};
users.users.gitlab-runner = {
uid = config.ids.uids.gitlab-runner;
home = "/var/lib/gitlab-runner";
description = "Gitlab Runner";
group = "gitlab-runner";
extraGroups = [ "docker" ];
createHome = true;
};
users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner;
}

View File

@@ -1,217 +0,0 @@
{ config, lib, pkgs, ... }:
{
imports = [
../module/meteocat-exporter.nix
../module/upc-qaire-exporter.nix
../module/nix-daemon-exporter.nix
];
age.secrets.grafanaJungleRobotPassword = {
file = ../../secrets/jungle-robot-password.age;
owner = "grafana";
mode = "400";
};
services.grafana = {
enable = true;
settings = {
server = {
domain = "jungle.bsc.es";
root_url = "%(protocol)s://%(domain)s/grafana";
serve_from_sub_path = true;
http_port = 2342;
http_addr = "127.0.0.1";
};
smtp = {
enabled = true;
from_address = "jungle-robot@bsc.es";
user = "jungle-robot";
# Read the password from a file, which is only readable by grafana user
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
host = "mail.bsc.es:465";
startTLS_policy = "NoStartTLS";
};
feature_toggles.publicDashboards = true;
"auth.anonymous".enabled = true;
log.level = "warn";
};
};
services.prometheus = {
enable = true;
port = 9001;
retentionTime = "5y";
listenAddress = "127.0.0.1";
};
# We need access to the devices to monitor the disk space
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
# Credentials for IPMI exporter
age.secrets.ipmiYml = {
file = ../../secrets/ipmi.yml.age;
owner = "ipmi-exporter";
};
# Create an IPMI group and assign the ipmi0 device
users.groups.ipmi = {};
services.udev.extraRules = ''
SUBSYSTEM=="ipmi", KERNEL=="ipmi0", GROUP="ipmi", MODE="0660"
'';
# Add a new ipmi-exporter user that can read the ipmi0 device
users.users.ipmi-exporter = {
isSystemUser = true;
group = "ipmi";
};
# Disable dynamic user so we have the ipmi-exporter user available for the credentials
systemd.services.prometheus-ipmi-exporter.serviceConfig = {
DynamicUser = lib.mkForce false;
PrivateDevices = lib.mkForce false;
User = lib.mkForce "ipmi-exporter";
Group = lib.mkForce "ipmi";
RestrictNamespaces = lib.mkForce false;
# Fake uid to 0 so it shuts up
ExecStart = let
cfg = config.services.prometheus.exporters.ipmi;
in lib.mkForce (lib.concatStringsSep " " ([
"${pkgs.util-linux}/bin/unshare --map-user 0"
"${pkgs.prometheus-ipmi-exporter}/bin/ipmi_exporter"
"--web.listen-address ${cfg.listenAddress}:${toString cfg.port}"
"--config.file ${lib.escapeShellArg cfg.configFile}"
] ++ cfg.extraFlags));
};
services.prometheus = {
exporters = {
ipmi = {
enable = true;
configFile = config.age.secrets.ipmiYml.path;
#extraFlags = [ "--log.level=debug" ];
listenAddress = "127.0.0.1";
};
node = {
enable = true;
enabledCollectors = [ "logind" ];
port = 9002;
listenAddress = "127.0.0.1";
};
blackbox = {
enable = true;
listenAddress = "127.0.0.1";
configFile = ./blackbox.yml;
};
};
scrapeConfigs = [
{
job_name = "local";
static_configs = [{
targets = [
"127.0.0.1:9002" # Node exporter
#"127.0.0.1:9115" # Blackbox exporter
"127.0.0.1:9290" # IPMI exporter for local node
"127.0.0.1:9928" # UPC Qaire custom exporter
"127.0.0.1:9929" # Meteocat custom exporter
"127.0.0.1:9999" # Nix-daemon custom exporter
];
}];
}
{
job_name = "blackbox-http";
metrics_path = "/probe";
params = { module = [ "http_2xx" ]; };
static_configs = [{
targets = [
"https://www.google.com/robots.txt"
"https://pm.bsc.es/"
"https://pm.bsc.es/gitlab/"
"https://jungle.bsc.es/"
"https://gitlab.bsc.es/"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:9115";
}
];
}
{
job_name = "blackbox-icmp";
metrics_path = "/probe";
params = { module = [ "icmp" ]; };
static_configs = [{
targets = [
"1.1.1.1"
"8.8.8.8"
"ssfhead"
"raccoon"
"anella-bsc.cesca.cat"
"upc-anella.cesca.cat"
"fox.ac.upc.edu"
"fox-ipmi.ac.upc.edu"
"arenys5.ac.upc.edu"
"arenys0-2.ac.upc.edu"
"epi01.bsc.es"
"axle.bsc.es"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:9115";
}
];
}
{
job_name = "ipmi-raccoon";
metrics_path = "/ipmi";
static_configs = [
{ targets = [ "127.0.0.1:9290" ]; }
];
params = {
target = [ "raccoon-ipmi" ];
module = [ "raccoon" ];
};
}
{
job_name = "ipmi-fox";
metrics_path = "/ipmi";
static_configs = [
{ targets = [ "127.0.0.1:9290" ]; }
];
params = {
target = [ "fox-ipmi.ac.upc.edu" ];
module = [ "fox" ];
};
}
];
};
}

View File

@@ -1,74 +0,0 @@
{ theFlake, pkgs, ... }:
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = theFlake;
buildInputs = [ pkgs.hugo ];
buildPhase = ''
cd web
rm -rf public/
hugo
'';
installPhase = ''
cp -r public $out
'';
# Don't mess doc/
dontFixup = true;
};
in
{
networking.firewall.allowedTCPPorts = [ 80 ];
services.nginx = {
enable = true;
virtualHosts."jungle.bsc.es" = {
root = "${website}";
listen = [
{
addr = "0.0.0.0";
port = 80;
}
];
extraConfig = ''
set_real_ip_from 127.0.0.1;
set_real_ip_from 84.88.52.107;
real_ip_recursive on;
real_ip_header X-Forwarded-For;
location /git {
rewrite ^/git$ / break;
rewrite ^/git/(.*) /$1 break;
proxy_pass http://127.0.0.1:3000;
proxy_redirect http:// $scheme://;
client_max_body_size 64M;
}
location /cache {
rewrite ^/cache/(.*) /$1 break;
proxy_pass http://127.0.0.1:5000;
proxy_redirect http:// $scheme://;
}
location /lists {
proxy_pass http://127.0.0.1:8081;
proxy_redirect http:// $scheme://;
}
location /grafana {
proxy_pass http://127.0.0.1:2342;
proxy_redirect http:// $scheme://;
proxy_set_header Host $host;
# Websockets
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ ^/~(.+?)(/.*)?$ {
alias /vault/home/$1/public_html$2;
index index.html index.htm;
autoindex on;
absolute_redirect off;
}
location /p/ {
alias /var/lib/p/;
}
'';
};
};
}

View File

@@ -1,16 +0,0 @@
{ config, ... }:
{
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
services.nix-serve = {
enable = true;
# Only listen locally, as we serve it via ssh
bindAddress = "127.0.0.1";
port = 5000;
secretKeyFile = config.age.secrets.nixServe.path;
# Public key:
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
};
}

View File

@@ -1,32 +0,0 @@
{ lib, ... }:
{
imports = [
../common/ssf.nix
];
# Select this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca";
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
# Users with sudo access
users.groups.wheel.members = [ "abonerib" "anavarro" ];
# Run julia installed with juliaup using julia's own libraries:
# NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia
programs.nix-ld.enable = true;
networking = {
hostName = "weasel";
interfaces.eno1.ipv4.addresses = [ {
address = "10.0.40.6";
prefixLength = 24;
} ];
interfaces.ibp5s0.ipv4.addresses = [ {
address = "10.0.42.6";
prefixLength = 24;
} ];
};
}

View File

@@ -1,12 +0,0 @@
HOSTCXX ?= g++
NVCC := nvcc -ccbin $(HOSTCXX)
CXXFLAGS := -m64
# Target rules
all: cudainfo
cudainfo: cudainfo.cpp
$(NVCC) $(CXXFLAGS) -o $@ $<
clean:
rm -f cudainfo cudainfo.o

View File

@@ -1,600 +0,0 @@
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
// Shared Utilities (QA Testing)
// std::system includes
#include <memory>
#include <iostream>
#include <cuda_runtime.h>
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
// CUDA Runtime error messages
#ifdef __DRIVER_TYPES_H__
static const char *_cudaGetErrorEnum(cudaError_t error)
{
switch (error)
{
case cudaSuccess:
return "cudaSuccess";
case cudaErrorMissingConfiguration:
return "cudaErrorMissingConfiguration";
case cudaErrorMemoryAllocation:
return "cudaErrorMemoryAllocation";
case cudaErrorInitializationError:
return "cudaErrorInitializationError";
case cudaErrorLaunchFailure:
return "cudaErrorLaunchFailure";
case cudaErrorPriorLaunchFailure:
return "cudaErrorPriorLaunchFailure";
case cudaErrorLaunchTimeout:
return "cudaErrorLaunchTimeout";
case cudaErrorLaunchOutOfResources:
return "cudaErrorLaunchOutOfResources";
case cudaErrorInvalidDeviceFunction:
return "cudaErrorInvalidDeviceFunction";
case cudaErrorInvalidConfiguration:
return "cudaErrorInvalidConfiguration";
case cudaErrorInvalidDevice:
return "cudaErrorInvalidDevice";
case cudaErrorInvalidValue:
return "cudaErrorInvalidValue";
case cudaErrorInvalidPitchValue:
return "cudaErrorInvalidPitchValue";
case cudaErrorInvalidSymbol:
return "cudaErrorInvalidSymbol";
case cudaErrorMapBufferObjectFailed:
return "cudaErrorMapBufferObjectFailed";
case cudaErrorUnmapBufferObjectFailed:
return "cudaErrorUnmapBufferObjectFailed";
case cudaErrorInvalidHostPointer:
return "cudaErrorInvalidHostPointer";
case cudaErrorInvalidDevicePointer:
return "cudaErrorInvalidDevicePointer";
case cudaErrorInvalidTexture:
return "cudaErrorInvalidTexture";
case cudaErrorInvalidTextureBinding:
return "cudaErrorInvalidTextureBinding";
case cudaErrorInvalidChannelDescriptor:
return "cudaErrorInvalidChannelDescriptor";
case cudaErrorInvalidMemcpyDirection:
return "cudaErrorInvalidMemcpyDirection";
case cudaErrorAddressOfConstant:
return "cudaErrorAddressOfConstant";
case cudaErrorTextureFetchFailed:
return "cudaErrorTextureFetchFailed";
case cudaErrorTextureNotBound:
return "cudaErrorTextureNotBound";
case cudaErrorSynchronizationError:
return "cudaErrorSynchronizationError";
case cudaErrorInvalidFilterSetting:
return "cudaErrorInvalidFilterSetting";
case cudaErrorInvalidNormSetting:
return "cudaErrorInvalidNormSetting";
case cudaErrorMixedDeviceExecution:
return "cudaErrorMixedDeviceExecution";
case cudaErrorCudartUnloading:
return "cudaErrorCudartUnloading";
case cudaErrorUnknown:
return "cudaErrorUnknown";
case cudaErrorNotYetImplemented:
return "cudaErrorNotYetImplemented";
case cudaErrorMemoryValueTooLarge:
return "cudaErrorMemoryValueTooLarge";
case cudaErrorInvalidResourceHandle:
return "cudaErrorInvalidResourceHandle";
case cudaErrorNotReady:
return "cudaErrorNotReady";
case cudaErrorInsufficientDriver:
return "cudaErrorInsufficientDriver";
case cudaErrorSetOnActiveProcess:
return "cudaErrorSetOnActiveProcess";
case cudaErrorInvalidSurface:
return "cudaErrorInvalidSurface";
case cudaErrorNoDevice:
return "cudaErrorNoDevice";
case cudaErrorECCUncorrectable:
return "cudaErrorECCUncorrectable";
case cudaErrorSharedObjectSymbolNotFound:
return "cudaErrorSharedObjectSymbolNotFound";
case cudaErrorSharedObjectInitFailed:
return "cudaErrorSharedObjectInitFailed";
case cudaErrorUnsupportedLimit:
return "cudaErrorUnsupportedLimit";
case cudaErrorDuplicateVariableName:
return "cudaErrorDuplicateVariableName";
case cudaErrorDuplicateTextureName:
return "cudaErrorDuplicateTextureName";
case cudaErrorDuplicateSurfaceName:
return "cudaErrorDuplicateSurfaceName";
case cudaErrorDevicesUnavailable:
return "cudaErrorDevicesUnavailable";
case cudaErrorInvalidKernelImage:
return "cudaErrorInvalidKernelImage";
case cudaErrorNoKernelImageForDevice:
return "cudaErrorNoKernelImageForDevice";
case cudaErrorIncompatibleDriverContext:
return "cudaErrorIncompatibleDriverContext";
case cudaErrorPeerAccessAlreadyEnabled:
return "cudaErrorPeerAccessAlreadyEnabled";
case cudaErrorPeerAccessNotEnabled:
return "cudaErrorPeerAccessNotEnabled";
case cudaErrorDeviceAlreadyInUse:
return "cudaErrorDeviceAlreadyInUse";
case cudaErrorProfilerDisabled:
return "cudaErrorProfilerDisabled";
case cudaErrorProfilerNotInitialized:
return "cudaErrorProfilerNotInitialized";
case cudaErrorProfilerAlreadyStarted:
return "cudaErrorProfilerAlreadyStarted";
case cudaErrorProfilerAlreadyStopped:
return "cudaErrorProfilerAlreadyStopped";
/* Since CUDA 4.0*/
case cudaErrorAssert:
return "cudaErrorAssert";
case cudaErrorTooManyPeers:
return "cudaErrorTooManyPeers";
case cudaErrorHostMemoryAlreadyRegistered:
return "cudaErrorHostMemoryAlreadyRegistered";
case cudaErrorHostMemoryNotRegistered:
return "cudaErrorHostMemoryNotRegistered";
/* Since CUDA 5.0 */
case cudaErrorOperatingSystem:
return "cudaErrorOperatingSystem";
case cudaErrorPeerAccessUnsupported:
return "cudaErrorPeerAccessUnsupported";
case cudaErrorLaunchMaxDepthExceeded:
return "cudaErrorLaunchMaxDepthExceeded";
case cudaErrorLaunchFileScopedTex:
return "cudaErrorLaunchFileScopedTex";
case cudaErrorLaunchFileScopedSurf:
return "cudaErrorLaunchFileScopedSurf";
case cudaErrorSyncDepthExceeded:
return "cudaErrorSyncDepthExceeded";
case cudaErrorLaunchPendingCountExceeded:
return "cudaErrorLaunchPendingCountExceeded";
case cudaErrorNotPermitted:
return "cudaErrorNotPermitted";
case cudaErrorNotSupported:
return "cudaErrorNotSupported";
/* Since CUDA 6.0 */
case cudaErrorHardwareStackError:
return "cudaErrorHardwareStackError";
case cudaErrorIllegalInstruction:
return "cudaErrorIllegalInstruction";
case cudaErrorMisalignedAddress:
return "cudaErrorMisalignedAddress";
case cudaErrorInvalidAddressSpace:
return "cudaErrorInvalidAddressSpace";
case cudaErrorInvalidPc:
return "cudaErrorInvalidPc";
case cudaErrorIllegalAddress:
return "cudaErrorIllegalAddress";
/* Since CUDA 6.5*/
case cudaErrorInvalidPtx:
return "cudaErrorInvalidPtx";
case cudaErrorInvalidGraphicsContext:
return "cudaErrorInvalidGraphicsContext";
case cudaErrorStartupFailure:
return "cudaErrorStartupFailure";
case cudaErrorApiFailureBase:
return "cudaErrorApiFailureBase";
}
return "<unknown>";
}
#endif
template< typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
if (result)
{
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
cudaDeviceReset();
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
int *pArgc = NULL;
char **pArgv = NULL;
#if CUDART_VERSION < 5000
// CUDA-C includes
#include <cuda.h>
// This function wraps the CUDA Driver API into a template function
template <class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
{
CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
if (CUDA_SUCCESS != error) {
fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
error, __FILE__, __LINE__);
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
exit(EXIT_FAILURE);
}
}
#endif /* CUDART_VERSION < 5000 */
// Beginning of GPU Architecture definitions
inline int ConvertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] = {
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
return nGpuArchCoresPerSM[index-1].Cores;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
pArgc = &argc;
pArgv = argv;
printf("%s Starting...\n\n", argv[0]);
printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
int deviceCount = 0;
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
if (error_id != cudaSuccess) {
printf("cudaGetDeviceCount failed: %s (%d)\n",
cudaGetErrorString(error_id), (int) error_id);
printf("Result = FAIL\n");
exit(EXIT_FAILURE);
}
// This function call returns 0 if there are no CUDA capable devices.
if (deviceCount == 0)
printf("There are no available device(s) that support CUDA\n");
else
printf("Detected %d CUDA Capable device(s)\n", deviceCount);
int dev, driverVersion = 0, runtimeVersion = 0;
for (dev = 0; dev < deviceCount; ++dev) {
cudaSetDevice(dev);
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
// Console log
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);
printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n",
(float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",
deviceProp.multiProcessorCount,
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
#if CUDART_VERSION >= 5000
// This is supported in CUDA 5.0 (runtime API device properties)
printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
if (deviceProp.l2CacheSize) {
printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);
}
#else
// This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
int memoryClock;
getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);
int memBusWidth;
getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
printf(" Memory Bus Width: %d-bit\n", memBusWidth);
int L2CacheSize;
getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
if (L2CacheSize) {
printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
}
#endif
printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",
deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);
printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
printf(" Warp size: %d\n", deviceProp.warpSize);
printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
deviceProp.maxThreadsDim[0],
deviceProp.maxThreadsDim[1],
deviceProp.maxThreadsDim[2]);
printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
deviceProp.maxGridSize[0],
deviceProp.maxGridSize[1],
deviceProp.maxGridSize[2]);
printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);
printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);
printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
#endif
printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
const char *sComputeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
"Prohibited (no host thread can use ::cudaSetDevice() with this device)",
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
"Unknown",
NULL
};
printf(" Compute Mode:\n");
printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
}
// If there are 2 or more GPUs, query to determine whether RDMA is supported
if (deviceCount >= 2)
{
cudaDeviceProp prop[64];
int gpuid[64]; // we want to find the first two GPU's that can support P2P
int gpu_p2p_count = 0;
for (int i=0; i < deviceCount; i++)
{
checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
// Only boards based on Fermi or later can support P2P
if ((prop[i].major >= 2)
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
&& prop[i].tccDriver
#endif
)
{
// This is an array of P2P capable GPUs
gpuid[gpu_p2p_count++] = i;
}
}
// Show all the combinations of support P2P GPUs
int can_access_peer_0_1, can_access_peer_1_0;
if (gpu_p2p_count >= 2)
{
for (int i = 0; i < gpu_p2p_count-1; i++)
{
for (int j = 1; j < gpu_p2p_count; j++)
{
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
prop[gpuid[j]].name, gpuid[j] ,
can_access_peer_0_1 ? "Yes" : "No");
}
}
for (int j = 1; j < gpu_p2p_count; j++)
{
for (int i = 0; i < gpu_p2p_count-1; i++)
{
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
prop[gpuid[i]].name, gpuid[i] ,
can_access_peer_1_0 ? "Yes" : "No");
}
}
}
}
// csv masterlog info
// *****************************
// exe and CUDA driver name
printf("\n");
std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
char cTemp[128];
// driver version
sProfileString += ", CUDA Driver Version = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
#else
sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
#endif
sProfileString += cTemp;
// Runtime version
sProfileString += ", CUDA Runtime Version = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
#else
sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
#endif
sProfileString += cTemp;
// Device count
sProfileString += ", NumDevs = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d", deviceCount);
#else
sprintf(cTemp, "%d", deviceCount);
#endif
sProfileString += cTemp;
// Print Out all device Names
for (dev = 0; dev < deviceCount; ++dev)
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 13, ", Device%d = ", dev);
#else
sprintf(cTemp, ", Device%d = ", dev);
#endif
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
sProfileString += cTemp;
sProfileString += deviceProp.name;
}
sProfileString += "\n";
printf("%s", sProfileString.c_str());
printf("Result = PASS\n");
// finish
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
return 0;
}

View File

@@ -1,43 +0,0 @@
{
stdenv
, cudatoolkit
, cudaPackages
, autoAddDriverRunpath
, strace
}:
stdenv.mkDerivation (finalAttrs: {
name = "cudainfo";
src = ./.;
buildInputs = [
cudatoolkit # Required for nvcc
cudaPackages.cuda_cudart.static # Required for -lcudart_static
autoAddDriverRunpath
];
installPhase = ''
mkdir -p $out/bin
cp -a cudainfo $out/bin
'';
passthru.gpuCheck = stdenv.mkDerivation {
name = "cudainfo-test";
requiredSystemFeatures = [ "cuda" ];
dontBuild = true;
nativeCheckInputs = [
finalAttrs.finalPackage # The cudainfo package from above
strace # When it fails, it will show the trace
];
dontUnpack = true;
doCheck = true;
checkPhase = ''
if ! cudainfo; then
set -x
cudainfo=$(command -v cudainfo)
ldd $cudainfo
readelf -d $cudainfo
strace -f $cudainfo
set +x
fi
'';
installPhase = "touch $out";
};
})

View File

@@ -1,25 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "meteocat-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
beautifulsoup4
lxml
prometheus-client
];
meta = with lib; {
description = "MeteoCat Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,54 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
from bs4 import BeautifulSoup
from urllib import request
# Configuration -------------------------------------------
meteo_station = "X8" # Barcelona - Zona Universitària
listening_port = 9929
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_tmin = Gauge('meteocat_temp_min', 'Min temperature')
metric_tmax = Gauge('meteocat_temp_max', 'Max temperature')
metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature')
metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation')
def update(st):
url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st
response = request.urlopen(url)
data = response.read()
soup = BeautifulSoup(data, 'lxml')
table = soup.find("table", {"class" : "tblperiode"})
rows = table.find_all('tr')
row = rows[-1] # Take the last row
row_data = []
header = row.find('th')
header_text = header.text.strip()
row_data.append(header_text)
for col in row.find_all('td'):
row_data.append(col.text)
try:
# Sometimes it will return '(s/d)' and fail to parse
metric_tavg.set(float(row_data[1]))
metric_tmax.set(float(row_data[2]))
metric_tmin.set(float(row_data[3]))
metric_srad.set(float(row_data[10]))
#print("ok: temp_avg={}".format(float(row_data[1])))
except:
print("cannot parse row: {}".format(row))
metric_tavg.set(float("nan"))
metric_tmax.set(float("nan"))
metric_tmin.set(float("nan"))
metric_srad.set(float("nan"))
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
update(meteo_station)
except:
print("update failed")
time.sleep(update_period)

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='meteocat-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["meteocat-exporter"],
)

View File

@@ -0,0 +1,36 @@
diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
index 33e88bc..ee3641c 100644
--- a/src/util/mpir_hwtopo.c
+++ b/src/util/mpir_hwtopo.c
@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
#ifdef HAVE_HWLOC
bindset = hwloc_bitmap_alloc();
hwloc_topology_init(&hwloc_topology);
- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
- if (xmlfile != NULL) {
- int rc;
- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
- if (rc == 0) {
- /* To have hwloc still actually call OS-specific hooks, the
- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
- * file is really the underlying system. */
- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
- }
- MPL_free(xmlfile);
- }
hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
if (!hwloc_topology_load(hwloc_topology))
--- a/src/mpi/init/local_proc_attrs.c
+++ b/src/mpi/init/local_proc_attrs.c
@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
/* Set the number of tag bits. The device may override this value. */
MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
- MPL_free(requested_kinds);
-
return mpi_errno;
}

View File

@@ -11,6 +11,10 @@ final: prev:
paths = [ pmix.dev pmix.out ]; paths = [ pmix.dev pmix.out ];
}; };
in prev.mpich.overrideAttrs (old: { in prev.mpich.overrideAttrs (old: {
patches = (old.patches or []) ++ [
# See https://github.com/pmodels/mpich/issues/6946
./mpich-fix-hwtopo.patch
];
buildInput = old.buildInputs ++ [ buildInput = old.buildInputs ++ [
libfabric libfabric
pmixAll pmixAll
@@ -50,7 +54,4 @@ final: prev:
}); });
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
cudainfo = prev.callPackage ./cudainfo/default.nix { };
} }

View File

@@ -1,24 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "upc-qaire-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
prometheus-client
requests
];
meta = with lib; {
description = "UPC Qaire Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='upc-qaire-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["upc-qaire-exporter"],
)

View File

@@ -1,74 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
import requests, json
from datetime import datetime, timedelta
# Configuration -------------------------------------------
listening_port = 9928
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor')
def genparams():
d = {}
d['topic'] = 'TEMPERATURE'
d['shift_dates_to'] = ''
d['datapoints'] = 301
d['devicesAndColors'] = '1148418@@@#40ACB6'
now = datetime.now()
d['fromDate'] = now.strftime('%d/%m/%Y')
d['toDate'] = now.strftime('%d/%m/%Y')
d['serviceFrequency'] = 'NONE'
# WTF!
for i in range(7):
for j in range(48):
key = 'week.days[{}].hours[{}].value'.format(i, j)
d[key] = 'OPEN'
return d
def measure():
# First we need to load session
s = requests.Session()
r = s.get("https://upc.edu/sirena")
if r.status_code != 200:
print("bad HTTP status code on new session: {}".format(r.status_code))
return
if s.cookies.get("JSESSIONID") is None:
print("cannot get JSESSIONID")
return
# Now we can pull the data
url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json"
r = s.post(url, data=genparams())
if r.status_code != 200:
print("bad HTTP status code on data: {}".format(r.status_code))
return
#print(r.text)
j = json.loads(r.content)
# Just take the last one
last = j['data']['chartElementList'][-1]
temp = last['values']['1148418-Temperatura']
return temp
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
metric_temp.set(measure())
except:
print("measure failed")
metric_temp.set(float("nan"))
time.sleep(update_period)

Binary file not shown.

View File

@@ -1,13 +1,9 @@
age-encryption.org/v1 age-encryption.org/v1
-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik -> ssh-ed25519 HY2yRg eRVX5yndWDLg9hw7sY1Iu8pJFy47luHvdL+zZGK2u1s
DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI e1nXXiMW0ywkZYh2s6c7/quGMfBOJOaRhNQDjCD2Iyc
-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio -> ssh-ed25519 CAWG4Q gYG7GRxRpJ0/5Wz0Z0J2wfLfkMFNmcy81dQEewM7gUA
hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU lamdUdx+xOFWF1lmUM4x9TT0cJtKu9Sp7w9JHwm13u0
-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw -> ssh-ed25519 MSF3dg HEzfpR8alG6WPzhaEjAmmjOFoFcMSQUldx46dBsXri4
hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs OAD5H/zZGhfevYrFJzJrbNKPomKZDOS9Qx5tmTp78Jo
-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY --- A0sMSiNXWaEIgRXR0x6UAIaluuVH6Zlv4CJ9sI0NXOw
eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM <EFBFBD><EFBFBD>6<EFBFBD>ph<EFBFBD><EFBFBD><EFBFBD>{<7B>><3E>F|<7C>i<EFBFBD>v <0B><>E}{<7B>ru<72><75>Ʒ<EFBFBD><C6B7><1A><EFBFBD><7F>}^<5E><>><3E>c6<06><14>j<> <09>g<EFBFBD>GW<47><57>:<3A>J3<19>|<7C>|<7C>Z<EFBFBD>
-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg
/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg
--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>YM<EFBFBD><EFBFBD>:E O<><4F>2<EFBFBD>r=<15>&4<><04>CQΣ<51><CEA3>hC<68><43><EFBFBD>cb<63>^Sy<53><79>% <09><>x-vC`g<><15><><EFBFBD><EFBFBD>W^<5E><>wVG <0B><><EFBFBD>

Binary file not shown.

View File

@@ -1,13 +1,10 @@
age-encryption.org/v1 age-encryption.org/v1
-> ssh-ed25519 HY2yRg U2KQWviZIVNemm9e8h7H+eOzoYNxXgLLS3hsZLMAuGk -> ssh-ed25519 HY2yRg GdmdkW+BqqwBgu30b846jv3J7jtCM+a3rgOERuA050A
6n5dH1McNzk3rscP4v2pqZYDWtUFMd15rZsEd/mqIFM FeGqM75jG9egesR+yyVKHm0/M+uBBp5Hclg4+qN0BR8
-> ssh-ed25519 cK5kHw Ebrj/cpz1cFWAYAV9OxgyyH85OEMUnfUIV66p7jaoFY -> ssh-ed25519 CAWG4Q a0wTWHgulQUYDAMZmXf3dOf6PdYgCqNtSylzWVVRNVM
6J7hWqODtS/fIF4BpxhxbrxZq5vbolvbLqRKqazT02M Bx+WSYaiY4ZwlSZJo2a1XPMQmbKOU7F0tKAqVRLBOPo
-> ssh-ed25519 CAWG4Q mXqoQH9ycHF7u0y8mazCgynHxNLxTnrmQHke+2a5QCc -> ssh-ed25519 MSF3dg KccUvZZUbxbCrRWUWrX8KcHF6vQ5FV/BqUqI59G7dj4
mq6PdSF+KOqthuXwzTCsOQsi5KG0z1wHUck+bSTyOBY CFr7GXpZ9rPgy7HBfOyiYF9FnZUw6KcZwq9f7/0KaU8
-> ssh-ed25519 xA739A TADeswueqDEroZWLjMw3RDNwVQ2xRD+JUMVZENovn0M --- E0Rp6RR/8+o0jvB1lRdhnlabxvI6uu/IgL2ZpPXzTc8
KFlnSjVFbjc+ZsbY8Ed7edC5B01TJGzd/dSryiLArPc <EFBFBD><13>#<23><>H<EFBFBD>$<24>F;<3B><EFBFBD><7F>%<25><>6<><02>2<EFBFBD><32>rfX<66>\Dn <20>ш<EFBFBD>ȉ<EFBFBD>x<EFBFBD><78>><3E><>&;<3B>c<EFBFBD>U<EFBFBD>I=<3D><>M<EFBFBD><4D><EFBFBD>?T<><54>Ǹ<EFBFBD><16>"px<70>ӭ\s<><73><EFBFBD>bF<62><46><EFBFBD><EFBFBD>WD<>{<7B>
-> ssh-ed25519 MSF3dg Pq+ZD8AqJGDHDbd4PO1ngNFST8+6C2ghZkO/knKzzEc AW>?U<><55><EFBFBD><17><>
wyiL/u38hdQMokmfTsBrY7CtYwc+31FG4EDaqVEn31U
--- 1z4cOipayh0zYkvasEVEvGreajegE/dqBV7b6E7aFh0
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>R<EFBFBD>@<40>/i<>I'<27><><EFBFBD>Nx<4E>r"<1D>`<1E>O<EFBFBD><4F><EFBFBD>y<><79>8<EFBFBD><38> \/<2F><>I<19><17>D<EFBFBD>`<60>ߓ<EFBFBD><DF93><EFBFBD><1E><04>uy<75><79><EFBFBD>:9Lt<4C><1D><><EFBFBD>؋<EFBFBD><D88B><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>AU<41><55><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>`<60>;<3B>q8<71>GLU#<23>i<EFBFBD>y<EFBFBD><79>i<03>ڜ

View File

@@ -1,15 +1,9 @@
age-encryption.org/v1 age-encryption.org/v1
-> ssh-ed25519 HY2yRg NYGOSeZn8nGJUqpWoOAA9XO8P7eckUBKXCs8wPs+wlU -> ssh-ed25519 HY2yRg xWRxJGWSzA5aplRYCYLB6aBwrUrQQJ2MtDYaD75V5nI
oLlgaZJVLV9Im1h0vHEKPVApsh46av8ovMgNoFDKle4 J07XF3NQiaYKKKNRcNWi9MloJD2wXHd+2K7bo6lF+QU
-> ssh-ed25519 cK5kHw bC9UlQXeP5LwIFFO9oHXocqojLtSPWE/kWbhCbiSIGg -> ssh-ed25519 CAWG4Q jNWymbyCczcm8RcaIEbFQBlOMALsuxTl4+pLUi0aR20
wPGpwKpCcV09jvxVmwj6BTmjm+CZv42sdgCqSfD624Y z5NixlrRD+Y7Z/aFPs6hiDW4/lp8CBQCeJYpbuG9yYM
-> ssh-ed25519 CAWG4Q WkJgjedCBn4i4b/VFuU9Wq21VkHxiuwsla+0PuiSiD4 -> ssh-ed25519 MSF3dg QsUQloEKN3k1G49FQnNR/Do6ILgGpjFcw3zu5kk1Ako
/nnfy2DTxQkkfCqzIa+lxgqn6MIgFlN5gZHYYApePvs IHwyFWUEWqCStNcFprnpBa8L5J6zKIsn+7HcgGRv3sM
-> ssh-ed25519 xA739A 9gcn6j7c7rCR50AetiuCkAnMsSEMtQto///qlTkAWhs --- oUia0fsL6opeYWACyXtHAu/Ld+bUIt/7S1VszYTvwgU
lXrOn+cehZpRkIRzSJ1e64KsCqWf3tKa4ABbYBquvqM <EFBFBD><EFBFBD>V<EFBFBD><16>*<2A>t<1B>2-<2D>7<><37><EFBFBD><EFBFBD><EFBFBD><EFBFBD>h<EFBFBD>&<26><>͢_!տ+<2B><><EFBFBD><EFBFBD>(<28><0F><11>n<EFBFBD><6E> <09><>(<28><19><>/}<7D><><EFBFBD><EFBFBD>C<EFBFBD>Nͷ|<04>N<>u<EFBFBD>5<EFBFBD>ù勚K<E58B9A><4B>l<EFBFBD>"<22><>klOX<4F>y<EFBFBD><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A<EFBFBD><41>e<><65>$
-> ssh-ed25519 MSF3dg FzrEytuzBKr+HwpC1bxev3q+6cSZoMMCJdJfuANlHwo
qVyt4YpzGfvNX6IqwXs6oRA5aSgidFFxEA22D8XPJBU
--- FHVIG8tcNJBte+3VUsR3FsOs8xqrAeboFLxOV/xvSz0
_B<5F>M<EFBFBD>j<EFBFBD>H~<7E><>
^<5E>^x<>'
<EFBFBD>X<1D> <0C><>|1d<31>:`r<> <0C><><EFBFBD>X<EFBFBD>A<EFBFBD><41>B<EFBFBD>Z<EFBFBD><5A><1C><18><><EFBFBD>|<7C>^<16>E<EFBFBD>{<7B><><EFBFBD>]<5D><>U<EFBFBD><1E><16>Ͼ<EFBFBD><CFBE><EFBFBD><EFBFBD>EЗ<45><D097><EFBFBD>83<38>V<EFBFBD><56><EFBFBD><1E>D<EFBFBD><44><EFBFBD><EFBFBD><EFBFBD>^<5E><>;:

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -2,10 +2,6 @@ let
keys = import ../keys.nix; keys = import ../keys.nix;
adminsKeys = builtins.attrValues keys.admins; adminsKeys = builtins.attrValues keys.admins;
hut = [ keys.hosts.hut ] ++ adminsKeys; hut = [ keys.hosts.hut ] ++ adminsKeys;
fox = [ keys.hosts.fox ] ++ adminsKeys;
apex = [ keys.hosts.apex ] ++ adminsKeys;
mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
tent = [ keys.hosts.tent ] ++ adminsKeys;
# Only expose ceph keys to safe nodes and admins # Only expose ceph keys to safe nodes and admins
safe = keys.hostGroup.safe ++ adminsKeys; safe = keys.hostGroup.safe ++ adminsKeys;
in in
@@ -14,19 +10,10 @@ in
"gitlab-runner-docker-token.age".publicKeys = hut; "gitlab-runner-docker-token.age".publicKeys = hut;
"gitlab-runner-shell-token.age".publicKeys = hut; "gitlab-runner-shell-token.age".publicKeys = hut;
"gitlab-bsc-docker-token.age".publicKeys = hut; "gitlab-bsc-docker-token.age".publicKeys = hut;
"nix-serve.age".publicKeys = mon; "nix-serve.age".publicKeys = hut;
"jungle-robot-password.age".publicKeys = mon; "jungle-robot-password.age".publicKeys = hut;
"ipmi.yml.age".publicKeys = mon; "ipmi.yml.age".publicKeys = hut;
"tent-gitlab-runner-pm-docker-token.age".publicKeys = tent;
"tent-gitlab-runner-pm-shell-token.age".publicKeys = tent;
"tent-gitlab-runner-bsc-docker-token.age".publicKeys = tent;
"vpn-dac-login.age".publicKeys = tent;
"vpn-dac-client-key.age".publicKeys = tent;
"ceph-user.age".publicKeys = safe; "ceph-user.age".publicKeys = safe;
"munge-key.age".publicKeys = safe; "munge-key.age".publicKeys = safe;
"wg-fox.age".publicKeys = fox;
"wg-apex.age".publicKeys = apex;
} }

View File

@@ -1,13 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w Zhbs+NM/SI49qQ0X8bBpWUWxYM0vUKCXNAnPpIE2NR0
CkBUmJ26EkwHztT8Pz0UGq2KZwN0Xz8iYQ9cEHL9OWQ
-> ssh-ed25519 cK5kHw 5KjUXJywRDp2A7l5ukTCS+WIAalxwP1f71ejGxwNrX4
JW8OLmfkULXo9AwYMGNyOgZ+nQ0MVc0PCM4kKPIo6V4
-> ssh-ed25519 CAWG4Q cVjY3R0ZHAfokA4kWlu5vOl2Gs7mdqRgRk4WSUOXAjg
IxEDvuximW99EqxmpW+Btpm0Zydmwg/u87bqnl26NYc
-> ssh-ed25519 xA739A hmuwZuxmJnuAjmU4X8yhPQ+hPWvN1G+ZS0pvD7fHamg
fnAPW6ZCrv5pSO4RQhhr8xz7ij7jAZJk0ApWluOXDng
-> ssh-ed25519 MSF3dg SSGLcWnum0Qo/0OnKDZVg9xAZMwGwVNYYmRJXxb4GU0
pdl6kATG7n2oMsoUboBfu+vDKurJcH1UvUa70rfMQkE
--- a2ZQAeAQlO9DWnegIAq6NpI1Po6f38l+hitZvq+zIW8
<EFBFBD>\ֺ"^<5E>DT<44>H<EFBFBD><48>3<EFBFBD><33><EFBFBD>_|.h<0E><><EFBFBD><EFBFBD><03>^<5E>n<14><0E><><EFBFBD><EFBFBD><1A>g<EFBFBD>S<EFBFBD>]_<><5F>?n<>z~2<>!<21>p7<70><37><<3C><14>ʨD?<3F>~<02>F<EFBFBD>$<24>`<60>q+<2B><><EFBFBD>SW<53>(+<2B><>P<EFBFBD>c<1E>u[<5B>m<EFBFBD>`O<>ܛ<EFBFBD>ϖT

View File

@@ -1,13 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w VKM/Y6Wy0gmb2gc4Q00VzHQ4IAxfSyshuDoaAzlEkFM
vf18uoEN5ZLJ4HcJg85epaseh1CRL9/ncXtU2HpH+QE
-> ssh-ed25519 cK5kHw sMuG07kjlI6VjPjELOUPzkn+KT9Yq7BPf0zSATM2aGI
/eODwL8KwyVgFjBK2MJlbqjN7mEvXCSsjq9D96szrng
-> ssh-ed25519 CAWG4Q t3/Ty7yCqC5x8KQY4VaHSQ9Q3epqMpXoBDKyKx9+VzE
JwgUsqMd+1jFZvFp9/SIoowbhSMVEkKp03T69+OHjho
-> ssh-ed25519 xA739A 0ohmKK427+4vupivrtjXp0dDK8wT4XUA9rWgcsCGKgA
msbeQyz3pL8RLtAeXX5tsfyHyOXxhfYpqaLEKnRxpPQ
-> ssh-ed25519 MSF3dg H+6jAoP7/Dxp8C/7Bk1C4CT1hpkUhtbnTWWIxkO24Ec
SrMuUG93T5lUw3xINEen5EEKLXJizIGFhBO1fVroFHE
--- tIPnH9cxTV3m3qzvZB97Egz+raWwZJ182BXXKDu8f+o
<EFBFBD><EFBFBD>f#<23>,|<7C>Ey.v<>DL<44>Ӻ<05>JPX<50><07><>`<60><><EFBFBD><EFBFBD>-#<23>F<EFBFBD>Ubs<62>(Q!?<3F><1A>#xJG?5<><35><EFBFBD><EFBFBD><EFBFBD>~<7E><>6MA<15> U<><55><EFBFBD>C<01><>M<>$+}W<>NϨG!<21><><EFBFBD><EFBFBD>a<EFBFBD><61><EFBFBD><EFBFBD>%<25>ǽ<EFBFBD>G

View File

@@ -1,13 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY
RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4
-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE
Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM
-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE
FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU
-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU
sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0
-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc
8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA
--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ
-b<>#<23><>M.<16>@<40>t<EFBFBD><74><EFBFBD>ŵ}+ό#@<40><><EFBFBD><EFBFBD><EFBFBD>k<EFBFBD>y<EFBFBD><79><EFBFBD>?v<><76>n<1F><>T<EFBFBD>+<2B><><EFBFBD>[<5B>Q<EFBFBD> gA<67><41><EFBFBD>

Binary file not shown.

View File

@@ -1,14 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 G5LX5w SRJhNenoQXbT1FgX3TMPnVH5P6oe2eHot+M1YsEjsEk
hfTSLgKi98Eh7JK5o7x2POpTEtQlQCpEa3keUFYCuME
-> ssh-ed25519 cK5kHw z5TwWJTkvx7HztjXHJW/aCOtOfPrQaLP0gyIT7rXcyU
b4NCpHfasgvkLLr+6LcWUl60p59aSNnfp3bl2OFYXo0
-> ssh-ed25519 CAWG4Q 4VpS1/OnFe8nxcQbRTKNhjsh/ZQ5cbhSMXwK/jjQ+3o
WF9wvOkqVml4UcEzyzeumKuUwCwwr2zvKLMg+PCB8nk
-> ssh-ed25519 xA739A 67FhuJ070jBVMt/xbKHWhfri6iIm0FyaFvzQabsvFBM
1G5/913dDv/r/6p1x/c5YiUnZzrX/LvIj33KW+PN0KU
-> ssh-ed25519 MSF3dg Bj/yB4N2wkyHCHC22tcjjJAA4ebSamN0Z4UVX3ZnryI
6D/ZgTs+j+MGDAbPU5zyK0i9zN6tQy68IcOnQZ27mYg
--- 169erk3ICSYLs4FPEuXCn7QlekWhsmSn0Lr+/R14I5Q
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><05>ҽ3<D2BD>s<EFBFBD>
w<EFBFBD><EFBFBD>4D<EFBFBD><EFBFBD>b.<2E><><EFBFBD>"|<7C><><EFBFBD>)"<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>;<3B>.<2E>ɫ7)<29>LeC<05>=S؟

Binary file not shown.

View File

@@ -1,14 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 cDBabA heyW9/cxgwFX9IexQIXjAQDWGQPNcMXcArQp2Rxsqx4
o9MQ7EH8PDDjsJdpH9F3Xq2zUoaDAJQlfFmYucSFs6Y
-> ssh-ed25519 cK5kHw Sza4pos7K3qW3omEeyidI/jszJNf9smemSZnUJfCIww
D6vazXki7hIYraIuSiGPS+FPbkFUwHhHWDf52OhEIMg
-> ssh-ed25519 CAWG4Q YexIHueOIMmIN8JIDyNUOKBkyz/k18HqV3hTXh48KlM
xh8UJzzWT6ByN+Dpn4JrMNsjGC/uc/v6LynwjBDz9NQ
-> ssh-ed25519 xA739A KySG3TXdqfCMUkVEDGa74B0op745s3XGYxFLyAXSQAc
5EI/yb5ctW9Qu18bHm3/sK97kwGcKzzmWvPSCWm89XA
-> ssh-ed25519 MSF3dg MNxnNj0fHmri8ophexXPNjRUBUWrzcuk5S1mucxUMTE
GVFWXtISEU8ZmlwL4nh4weAgfGrt2GHX0DTzbpS6zg8
--- UdrqkYG2ZApAuwdZeNhC50NP2rkD/Ol6y8nJa4RHx7Y
<EFBFBD>ܻ<EFBFBD>m(<28><><EFBFBD>><3E>H<48>Y87<><37>G<0F>+*<12><><EFBFBD><EFBFBD>9V<>.<2E><><EFBFBD><EFBFBD><03><><EFBFBD>p<EFBFBD>Oo<4F>=+哇<>P0<50><30>{<7B>)<29><17><><EFBFBD><EFBFBD>><3E>z3P^
u

View File

@@ -5,12 +5,18 @@ description: "Request access to the machines"
![Cave](./cave.jpg) ![Cave](./cave.jpg)
To request access to the machines we will need some information: Before requesting access to the jungle machines, you must be able to access the
`ssfhead.bsc.es` node (only available via the intranet or VPN). You can request
access to the login machine using a resource petition in the BSC intranet.
Then, to request access to the machines we will need some information about you:
1. Which machines you want access to ([hut](/hut), [fox](/fox), owl1, owl2, eudy, koro...) 1. Which machines you want access to ([hut](/hut), [fox](/fox), owl1, owl2, eudy, koro...)
1. Your user name (make sure it matches the one you use for the BSC intranet) 1. Your user name and user id (to match the NFS permissions)
1. Your real name and surname (for identification purposes) 1. Your real name and surname (for identification purposes)
1. The salted hash of your login password, generated with `mkpasswd -m sha-512` 1. The salted hash of your login password, generated with `mkpasswd -m sha-512`
1. An SSH public key of type Ed25519 (can be generated with `ssh-keygen -t ed25519`) 1. An SSH public key of type Ed25519 (can be generated with `ssh-keygen -t ed25519`)
Send an email to <jungle@bsc.es> with the details. Send an email to <jungle@bsc.es> with the details, or directly open a
merge request in the [jungle
repository](https://pm.bsc.es/gitlab/rarias/jungle/).

View File

@@ -21,28 +21,17 @@ the detailed specifications:
## Access ## Access
To access the machine, request a SLURM session from [apex](/apex) using the `fox` To access the machine, request a SLURM session from [hut](/hut) using the `fox`
partition. If you need the machine for performance measurements, use an partition:
exclusive reservation:
apex% salloc -p fox --exclusive hut% salloc -p fox
Otherwise, specify the CPUs that you need so other users can also use the node Then connect via ssh:
at the same time:
apex% salloc -p fox -c 8 hut% ssh fox
Then use srun to execute an interactive shell:
apex% srun --pty $SHELL
fox% fox%
Make sure you get all CPUs you expect: Follow [these steps](/access) if you don't have access to hut or fox.
fox% grep Cpus_allowed_list /proc/self/status
Cpus_allowed_list: 0-191
Follow [these steps](/access) if you don't have access to apex or fox.
## CUDA ## CUDA
@@ -100,8 +89,9 @@ Then just run `nix develop` from the same directory:
The machine has several file systems available. The machine has several file systems available.
- `/nfs/home`: The `/home` from apex via NFS, which is also shared with other - `$HOME`: Mounted via NFS across all nodes. It is slow and has low capacity.
xeon machines. It has about 2 ms of latency, so not suitable for quick random Don't abuse.
access. - `/ceph/home/$USER`: Shared Ceph file system across jungle nodes. Slow but high
capacity. Stores three redundant copies of every file.
- `/nvme{0,1}/$USER`: The two local NVME disks, very fast and large capacity. - `/nvme{0,1}/$USER`: The two local NVME disks, very fast and large capacity.
- `/tmp`: tmpfs, fast but not backed by a disk. Will be erased on reboot. - `/tmp`: tmpfs, fast but not backed by a disk. Will be erased on reboot.

View File

@@ -13,115 +13,6 @@ which is available at `hut` or `xeon07`. It runs the following services:
- Grafana: to plot the data in the web browser. - Grafana: to plot the data in the web browser.
- Slurmctld: to manage the SLURM nodes. - Slurmctld: to manage the SLURM nodes.
- Gitlab runner: to run CI jobs from Gitlab. - Gitlab runner: to run CI jobs from Gitlab.
- Nix binary cache: to serve cached nix builds
This node is prone to interruptions from all the services it runs, so it is not This node is prone to interruptions from all the services it runs, so it is not
a good candidate for low noise executions. a good candidate for low noise executions.
# Binary cache
We provide a binary cache in `hut`, with the aim of avoiding unnecessary
recompilation of packages.
The cache should contain common packages from bscpkgs, but we don't provide
any guarantee that of what will be available in the cache, or for how long.
We recommend following the latest version of the `jungle` flake to avoid cache
misses.
## Usage
### From NixOS
In NixOS, we can add the cache through the `nix.settings` option, which will
enable it for all builds in the system.
```nix
{ ... }: {
nix.settings = {
extra-substituters = [ "https://jungle.bsc.es/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
}
```
### Interactively
The cache can also be specified in a per-command basis through the flags
`--substituters` and `--trusted-public-keys`:
```sh
nix build --substituters "https://jungle.bsc.es/cache" --trusted-public-keys "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" <...>
```
Note: you'll have to be a trusted user.
### Nix configuration file (non-nixos)
If using nix outside of NixOS, you'll have to update `/etc/nix/nix.conf`
```
# echo "extra-substituters = https://jungle.bsc.es/cache" >> /etc/nix/nix.conf
# echo "extra-trusted-public-keys = jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" >> /etc/nix/nix.conf
```
### Hint in flakes
By adding the configuration below to a `flake.nix`, when someone uses the flake,
`nix` will interactively ask to trust and use the provided binary cache:
```nix
{
nixConfig = {
extra-substituters = [
"https://jungle.bsc.es/cache"
];
extra-trusted-public-keys = [
"jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0="
];
};
outputs = { ... }: {
...
};
}
```
### Querying the cache
Check if the cache is available:
```sh
$ curl https://jungle.bsc.es/cache/nix-cache-info
StoreDir: /nix/store
WantMassQuery: 1
Priority: 30
```
Prevent nix from building locally:
```bash
nix build --max-jobs 0 <...>
```
Check if a package is in cache:
```bash
# Do a raw eval on the <package>.outPath (this should not build the package)
$ nix eval --raw jungle#openmp.outPath
/nix/store/dwnn4dgm1m4184l4xbi0qfrprji9wjmi-openmp-2024.11
# Take the hash (everything from / to - in the basename) and curl <hash>.narinfo
# if it exists in the cache, it will return HTTP 200 and some information
# if not, it will return 404
$ curl https://jungle.bsc.es/cache/dwnn4dgm1m4184l4xbi0qfrprji9wjmi.narinfo
StorePath: /nix/store/dwnn4dgm1m4184l4xbi0qfrprji9wjmi-openmp-2024.11
URL: nar/dwnn4dgm1m4184l4xbi0qfrprji9wjmi-17imkdfqzmnb013d14dx234bx17bnvws8baf3ii1xra5qi2y1wiz.nar
Compression: none
NarHash: sha256:17imkdfqzmnb013d14dx234bx17bnvws8baf3ii1xra5qi2y1wiz
NarSize: 1519328
References: 4gk773fqcsv4fh2rfkhs9bgfih86fdq8-gcc-13.3.0-lib nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36
Deriver: vcn0x8hikc4mvxdkvrdxp61bwa5r7lr6-openmp-2024.11.drv
Sig: jungle.bsc.es:GDTOUEs1jl91wpLbb+gcKsAZjpKdARO9j5IQqb3micBeqzX2M/NDtKvgCS1YyiudOUdcjwa3j+hyzV2njokcCA==
# In oneline:
$ curl "https://jungle.bsc.es/cache/$(nix eval --raw jungle#<package>.outPath | cut -d '/' -f4 | cut -d '-' -f1).narinfo"
```
#### References
- https://nix.dev/guides/recipes/add-binary-cache.html
- https://nixos.wiki/wiki/Binary_Cache

View File

@@ -5,13 +5,13 @@ author: "Rodrigo Arias Mallo"
date: 2024-09-20 date: 2024-09-20
--- ---
The tent machine provides a paste service using the program `p` (as in paste). The hut machine provides a paste service using the program `p` (as in paste).
You can use it directly from the tent machine or remotely if you have [SSH You can use it directly from the hut machine or remotely if you have [SSH
access](/access) to tent using the following alias: access](/access) to hut using the following alias:
``` ```
alias p="ssh tent p" alias p="ssh hut p"
``` ```
You can add it to bashrc or zshrc for persistent installation. You can add it to bashrc or zshrc for persistent installation.
@@ -19,7 +19,7 @@ You can add it to bashrc or zshrc for persistent installation.
## Usage ## Usage
The `p` command reads from the standard input, uploads the content to a file The `p` command reads from the standard input, uploads the content to a file
in the local filesystem and prints the URL to access it. It only accepts an in the ceph filesystem and prints the URL to access it. It only accepts an
optional argument, which is the extension of the file that will be stored on optional argument, which is the extension of the file that will be stored on
disk (without the dot). By default it uses the `txt` extension, so plain text disk (without the dot). By default it uses the `txt` extension, so plain text
can be read in the browser directly. can be read in the browser directly.
@@ -28,21 +28,21 @@ can be read in the browser directly.
p [extension] p [extension]
``` ```
To remove files, go to `/var/lib/p/$USER` and remove them manually. To remove files, go to `/ceph/p/$USER` and remove them manually.
## Examples ## Examples
Share a text file, in this case the source of p itself: Share a text file, in this case the source of p itself:
``` ```
tent% p < m/tent/p.nix hut% p < m/hut/p.nix
https://jungle.bsc.es/p/rarias/okbtG130.txt https://jungle.bsc.es/p/rarias/okbtG130.txt
``` ```
Paste the last dmesg lines directly from a pipe: Paste the last dmesg lines directly from a pipe:
``` ```
tent% dmesg | tail -5 | p hut% dmesg | tail -5 | p
https://jungle.bsc.es/p/rarias/luX4STm9.txt https://jungle.bsc.es/p/rarias/luX4STm9.txt
``` ```