19 Commits

Author SHA1 Message Date
8161a21fbc users: Create tracing group and add arocanon 2025-05-15 12:32:54 +02:00
a2b9b155f6 raccoon: Add lttng and extend perf support 2025-05-15 12:21:26 +02:00
00e686a1d8 raccoon: Enable nixdebuginfod 2025-05-06 14:44:12 +02:00
f00575640c Set keep-outputs to true in all machines
From the documentation of keep-outputs, setting it to true would prevent
the GC from removing build time dependencies:

If true, the garbage collector will keep the outputs of non-garbage
derivations. If false (default), outputs will be deleted unless they are
GC roots themselves (or reachable from other roots).

In general, outputs must be registered as roots separately. However,
even if the output of a derivation is registered as a root, the
collector will still delete store paths that are used only at build time
(e.g., the C compiler, or source tarballs downloaded from the network).
To prevent it from doing so, set this option to true.

See: https://nix.dev/manual/nix/2.24/command-ref/conf-file.html#conf-keep-outputs
Reviewed-by: Aleix Roca Nonell <aleix.rocanonell@bsc.es>
2025-05-05 11:46:46 +02:00
5899d8847c Use nix cache from hut 2025-05-05 11:15:18 +02:00
2be8c0dad3 Make raccoon use performance governor 2025-05-05 11:15:14 +02:00
cf4ce9fa18 Enable binfmt emulation in raccoon 2025-03-21 17:51:41 +01:00
b513cd8982 Disable nix garbage collector 2025-03-18 16:48:47 +01:00
b87d79e1cc Add dbautist user to raccoon machine 2025-03-03 13:55:23 +01:00
4807337098 Add node exporter monitoring in raccoon 2025-02-25 17:11:09 +01:00
82ed40d386 Add FPGA u280 firmware (working ok) 2025-02-25 17:08:13 +01:00
e81b7cc158 Add U280 support back in the xocl driver
They removed U280 from the list of supported devices, but that doesn't
mean it is will stop working.
2025-02-21 10:30:42 +01:00
0533a48a98 Add udev rules 2025-02-20 17:32:31 +01:00
a9c1cb26ea Load Xilinx modules at boot 2025-02-20 16:27:14 +01:00
4ab38212b3 Add Xilinx xocl derivation
Need to move to raccoon to access "config" attribute.
2025-02-20 16:04:52 +01:00
df8b5b2d67 First successful build of Xilinx XRT
What an absolute nightmare, and we are far from the end.
2025-02-20 12:07:21 +01:00
c5400955c7 Allow X11 forwarding via SSH 2025-02-18 16:19:04 +01:00
165a60b0d2 Enable linger for user rarias
Allows services to run without a login session.
2024-10-14 19:12:25 +02:00
bd4dabf03a Only proxy SSH git remotes via hut in xeon
Other machines like raccoon have direct access.
2024-10-14 18:31:04 +02:00
123 changed files with 1356 additions and 4044 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,2 @@
*.swp
/result
/misc

View File

@@ -1,46 +0,0 @@
#!/bin/sh
# Trims the jungle repository by moving the website to its own repository and
# removing it from jungle. It also removes big pdf files and kernel
# configurations so the jungle repository is small.
set -e
if [ -e oldjungle -o -e newjungle -o -e website ]; then
echo "remove oldjungle/, newjungle/ and website/ first"
exit 1
fi
# Clone the old jungle repo
git clone gitea@tent:rarias/jungle.git oldjungle
# First split the website into a new repository
mkdir website && git -C website init -b master
git-filter-repo \
--path web \
--subdirectory-filter web \
--source oldjungle \
--target website
# Then remove the website, pdf files and big kernel configs
mkdir newjungle && git -C newjungle init -b master
git-filter-repo \
--invert-paths \
--path web \
--path-glob 'doc*.pdf' \
--path-glob '**/kernel/configs/lockdep' \
--path-glob '**/kernel/configs/defconfig' \
--source oldjungle \
--target newjungle
set -x
du -sh oldjungle newjungle website
# 57M oldjungle
# 2,3M newjungle
# 6,4M website
du -sh --exclude=.git oldjungle newjungle website
# 30M oldjungle
# 700K newjungle
# 3,5M website

34
flake.lock generated
View File

@@ -10,11 +10,11 @@
"systems": "systems"
},
"locked": {
"lastModified": 1750173260,
"narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=",
"lastModified": 1720546205,
"narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=",
"owner": "ryantm",
"repo": "agenix",
"rev": "531beac616433bac6f9e2a19feb8e99a22a66baf",
"rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6",
"type": "github"
},
"original": {
@@ -30,11 +30,11 @@
]
},
"locked": {
"lastModified": 1749650500,
"narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=",
"lastModified": 1713974364,
"narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=",
"ref": "refs/heads/master",
"rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405",
"revCount": 961,
"rev": "de89197a4a7b162db7df9d41c9d07759d87c5709",
"revCount": 937,
"type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs"
},
@@ -51,11 +51,11 @@
]
},
"locked": {
"lastModified": 1744478979,
"narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=",
"lastModified": 1700795494,
"narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
"owner": "lnl7",
"repo": "nix-darwin",
"rev": "43975d782b418ebf4969e9ccba82466728c2851b",
"rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
"type": "github"
},
"original": {
@@ -73,11 +73,11 @@
]
},
"locked": {
"lastModified": 1745494811,
"narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=",
"lastModified": 1703113217,
"narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be",
"rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
"type": "github"
},
"original": {
@@ -88,16 +88,16 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1752436162,
"narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
"lastModified": 1720957393,
"narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
"rev": "693bc46d169f5af9c992095736e82c3488bf7dbb",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.05",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}

View File

@@ -1,6 +1,6 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
agenix.url = "github:ryantm/agenix";
agenix.inputs.nixpkgs.follows = "nixpkgs";
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
@@ -18,7 +18,6 @@ in
{
nixosConfigurations = {
hut = mkConf "hut";
tent = mkConf "tent";
owl1 = mkConf "owl1";
owl2 = mkConf "owl2";
eudy = mkConf "eudy";
@@ -26,9 +25,6 @@ in
bay = mkConf "bay";
lake2 = mkConf "lake2";
raccoon = mkConf "raccoon";
fox = mkConf "fox";
apex = mkConf "apex";
weasel = mkConf "weasel";
};
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {

View File

@@ -9,29 +9,21 @@ rec {
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
raccoon = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNQttFvL0dNEyy7klIhLoK4xXOeM2/K9R7lPMTG3qvK raccoon";
};
hostGroup = with hosts; rec {
compute = [ owl1 owl2 fox raccoon ];
playground = [ eudy koro weasel ];
compute = [ owl1 owl2 ];
playground = [ eudy koro ];
storage = [ bay lake2 ];
monitor = [ hut ];
login = [ apex ];
system = storage ++ monitor ++ login;
system = storage ++ monitor;
safe = system ++ compute;
all = safe ++ playground;
};
admins = {
"rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
"rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
"rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
};
}

View File

@@ -1,69 +0,0 @@
{ lib, config, pkgs, ... }:
{
imports = [
../common/xeon.nix
../common/ssf/hosts.nix
../module/ceph.nix
../module/hut-substituter.nix
../module/slurm-server.nix
./nfs.nix
./wireguard.nix
];
# Don't install grub MBR for now
boot.loader.grub.device = "nodev";
boot.initrd.kernelModules = [
"megaraid_sas" # For HW RAID
];
environment.systemPackages = with pkgs; [
storcli # To manage HW RAID
];
fileSystems."/home" = {
device = "/dev/disk/by-label/home";
fsType = "ext4";
};
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
networking = {
hostName = "apex";
defaultGateway = "84.88.53.233";
nameservers = [ "8.8.8.8" ];
# Public facing interface
interfaces.eno1.ipv4.addresses = [ {
address = "84.88.53.236";
prefixLength = 29;
} ];
# Internal LAN to our Ethernet switch
interfaces.eno2.ipv4.addresses = [ {
address = "10.0.40.30";
prefixLength = 24;
} ];
# Infiniband over Omnipath switch (disconnected for now)
# interfaces.ibp5s0 = {};
nat = {
enable = true;
internalInterfaces = [ "eno2" ];
externalInterface = "eno1";
};
};
networking.firewall = {
extraCommands = ''
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
# logs. Insert as first position so we also protect SSH.
iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse
# Same with opsmonweb01.bsc.es which seems to be trying to access via SSH
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
'';
};
}

View File

@@ -1,48 +0,0 @@
{ ... }:
{
services.nfs.server = {
enable = true;
lockdPort = 4001;
mountdPort = 4002;
statdPort = 4000;
exports = ''
/home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash)
/home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash)
'';
};
networking.firewall = {
# Check with `rpcinfo -p`
extraCommands = ''
# Accept NFS traffic from compute nodes but not from the outside
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
# Same but UDP
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
# Accept NFS traffic from wg0
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
# Same but UDP
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
'';
};
}

View File

@@ -1,42 +0,0 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgApex.file = ../../secrets/wg-apex.age;
# Enable WireGuard
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
ips = [ "10.106.0.30/24" ];
listenPort = 666;
privateKeyFile = config.age.secrets.wgApex.path;
# Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
peers = [
{
name = "fox";
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
allowedIPs = [ "10.106.0.1/32" ];
endpoint = "fox.ac.upc.edu:666";
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
persistentKeepalive = 25;
}
{
name = "raccoon";
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
}
];
};
};
networking.hosts = {
"10.106.0.1" = [ "fox" ];
"10.106.0.236" = [ "raccoon" ];
"10.0.44.4" = [ "tent" ];
};
}

View File

@@ -2,8 +2,7 @@
{
imports = [
../common/ssf.nix
../module/hut-substituter.nix
../common/xeon.nix
../module/monitoring.nix
];

View File

@@ -3,7 +3,6 @@
# Includes the basic configuration for an Intel server.
imports = [
./base/agenix.nix
./base/always-power-on.nix
./base/august-shutdown.nix
./base/boot.nix
./base/env.nix

View File

@@ -1,8 +0,0 @@
{
imports = [
../../module/power-policy.nix
];
# Turn on as soon as we have power
power.policy = "always-on";
}

View File

@@ -1,12 +1,12 @@
{
# Shutdown all machines on August 3rd at 22:00, so we can protect the
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
# hardware from spurious electrical peaks on the yearly electrical cut for
# manteinance that starts on August 4th.
systemd.timers.august-shutdown = {
description = "Shutdown on August 3rd for maintenance";
description = "Shutdown on August 2nd for maintenance";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*-08-03 22:00:00";
OnCalendar = "*-08-02 11:00:00";
RandomizedDelaySec = "10min";
Unit = "systemd-poweroff.service";
};

View File

@@ -3,8 +3,8 @@
{
environment.systemPackages = with pkgs; [
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns pv
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns
# From bsckgs overlay
osumb
];
@@ -21,8 +21,6 @@
}
];
environment.enableAllTerminfo = true;
environment.variables = {
EDITOR = "vim";
VISUAL = "vim";

View File

@@ -1,4 +1,4 @@
{ pkgs, lib, ... }:
{ pkgs, ... }:
{
networking = {
@@ -10,14 +10,10 @@
allowedTCPPorts = [ 22 ];
};
# Make sure we use iptables
nftables.enable = lib.mkForce false;
hosts = {
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
"84.88.51.152" = [ "raccoon" ];
"84.88.51.142" = [ "raccoon-ipmi" ];
"192.168.11.12" = [ "bscpm04.bsc.es" ];
"192.168.11.15" = [ "gitlab-internal.bsc.es" ];
};
};
}

View File

@@ -6,8 +6,6 @@
(import ../../../pkgs/overlay.nix)
];
nixpkgs.config.allowUnfree = true;
nix = {
nixPath = [
"nixpkgs=${nixpkgs}"

View File

@@ -11,8 +11,5 @@ in
programs.ssh.knownHosts = hostsKeys // {
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
"bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT";
"glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
"glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
};
}

View File

@@ -56,7 +56,7 @@
home = "/home/Computational/rpenacob";
description = "Raúl Peñacoba";
group = "Computational";
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ];
hosts = [ "owl1" "owl2" "hut" ];
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
@@ -69,10 +69,10 @@
home = "/home/Computational/anavarro";
description = "Antoni Navarro";
group = "Computational";
hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ];
hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1";
hosts = [ "hut" "raccoon" ];
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
];
};
@@ -82,7 +82,7 @@
home = "/home/Computational/abonerib";
description = "Aleix Boné";
group = "Computational";
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ];
hosts = [ "owl1" "owl2" "hut" "raccoon" ];
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
@@ -95,7 +95,7 @@
home = "/home/Computational/vlopez";
description = "Victor López";
group = "Computational";
hosts = [ "apex" "koro" ];
hosts = [ "koro" ];
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
@@ -108,78 +108,12 @@
home = "/home/Computational/dbautist";
description = "Dylan Bautista Cases";
group = "Computational";
hosts = [ "apex" "hut" "tent" "raccoon" ];
hosts = [ "hut" "raccoon" ];
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
];
};
dalvare1 = {
uid = 2758;
isNormalUser = true;
home = "/home/Computational/dalvare1";
description = "David Álvarez";
group = "Computational";
hosts = [ "apex" "hut" "tent" "fox" ];
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
];
};
varcila = {
uid = 5650;
isNormalUser = true;
home = "/home/Computational/varcila";
description = "Vincent Arcila";
group = "Computational";
hosts = [ "apex" "hut" "tent" "fox" ];
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
];
};
pmartin1 = {
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
uid = 9652;
isNormalUser = true;
home = "/home/Computational/pmartin1";
description = "Pedro J. Martinez-Ferrer";
group = "Computational";
hosts = [ "fox" ];
hashedPassword = "$6$nIgDMGnt4YIZl3G.$.JQ2jXLtDPRKsbsJfJAXdSvjDIzRrg7tNNjPkLPq3KJQhMjfDXRUvzagUHUU2TrE2hHM8/6uq8ex0UdxQ0ysl.";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a"
];
};
csiringo = {
uid = 9653;
isNormalUser = true;
home = "/home/Computational/csiringo";
description = "Cesare Siringo";
group = "Computational";
hosts = [ ];
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
];
};
acinca = {
uid = 9654;
isNormalUser = true;
home = "/home/Computational/acinca";
description = "Arnau Cinca";
group = "Computational";
hosts = [ "apex" "hut" "fox" "owl1" "owl2" ];
hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc"
];
};
};
groups = {

View File

@@ -1,10 +0,0 @@
{
# Provides the base system for a xeon node in the SSF rack.
imports = [
./xeon.nix
./ssf/fs.nix
./ssf/hosts.nix
./ssf/hosts-remote.nix
./ssf/net.nix
];
}

View File

@@ -1,9 +0,0 @@
{ pkgs, ... }:
{
networking.hosts = {
# Remote hosts visible from compute nodes
"10.106.0.236" = [ "raccoon" ];
"10.0.44.4" = [ "tent" ];
};
}

View File

@@ -1,23 +0,0 @@
{ pkgs, ... }:
{
networking.hosts = {
# Login
"10.0.40.30" = [ "apex" ];
# Storage
"10.0.40.40" = [ "bay" ]; "10.0.42.40" = [ "bay-ib" ]; "10.0.40.141" = [ "bay-ipmi" ];
"10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ];
"10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ]; "10.0.40.143" = [ "lake2-ipmi" ];
# Xeon compute
"10.0.40.1" = [ "owl1" ]; "10.0.42.1" = [ "owl1-ib" ]; "10.0.40.101" = [ "owl1-ipmi" ];
"10.0.40.2" = [ "owl2" ]; "10.0.42.2" = [ "owl2-ib" ]; "10.0.40.102" = [ "owl2-ipmi" ];
"10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ];
#"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ];
"10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ];
"10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ];
"10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ];
"10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ];
};
}

View File

@@ -1,23 +0,0 @@
{ pkgs, ... }:
{
# Infiniband (IPoIB)
environment.systemPackages = [ pkgs.rdma-core ];
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
networking = {
defaultGateway = "10.0.40.30";
nameservers = ["8.8.8.8"];
firewall = {
extraCommands = ''
# Prevent ssfhead from contacting our slurmd daemon
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
# But accept traffic to slurm ports from any other node in the subnet
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
# We also need to open the srun port range
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
'';
};
};
}

View File

@@ -1,7 +1,10 @@
{
# Provides the base system for a xeon node, not necessarily in the SSF rack.
# Provides the base system for a xeon node.
imports = [
./base.nix
./xeon/console.nix
./xeon/fs.nix
./xeon/net.nix
./xeon/ssh.nix
];
}

90
m/common/xeon/net.nix Normal file
View File

@@ -0,0 +1,90 @@
{ pkgs, ... }:
{
# Infiniband (IPoIB)
environment.systemPackages = [ pkgs.rdma-core ];
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
networking = {
defaultGateway = "10.0.40.30";
nameservers = ["8.8.8.8"];
proxy = {
default = "http://hut:23080/";
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40";
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
# https://github.com/golang/go/issues/16715
allProxy = null;
};
firewall = {
extraCommands = ''
# Prevent ssfhead from contacting our slurmd daemon
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
# But accept traffic to slurm ports from any other node in the subnet
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
# We also need to open the srun port range
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
'';
};
extraHosts = ''
10.0.40.30 ssfhead
# Node Entry for node: mds01 (ID=72)
10.0.40.40 bay mds01 mds01-eth0
10.0.42.40 bay-ib mds01-ib0
10.0.40.141 bay-ipmi mds01-ipmi0
# Node Entry for node: oss01 (ID=73)
10.0.40.41 oss01 oss01-eth0
10.0.42.41 oss01-ib0
10.0.40.142 oss01-ipmi0
# Node Entry for node: oss02 (ID=74)
10.0.40.42 lake2 oss02 oss02-eth0
10.0.42.42 lake2-ib oss02-ib0
10.0.40.143 lake2-ipmi oss02-ipmi0
# Node Entry for node: xeon01 (ID=15)
10.0.40.1 owl1 xeon01 xeon01-eth0
10.0.42.1 owl1-ib xeon01-ib0
10.0.40.101 owl1-ipmi xeon01-ipmi0
# Node Entry for node: xeon02 (ID=16)
10.0.40.2 owl2 xeon02 xeon02-eth0
10.0.42.2 owl2-ib xeon02-ib0
10.0.40.102 owl2-ipmi xeon02-ipmi0
# Node Entry for node: xeon03 (ID=17)
10.0.40.3 xeon03 xeon03-eth0
10.0.42.3 xeon03-ib0
10.0.40.103 xeon03-ipmi0
# Node Entry for node: xeon04 (ID=18)
10.0.40.4 xeon04 xeon04-eth0
10.0.42.4 xeon04-ib0
10.0.40.104 xeon04-ipmi0
# Node Entry for node: xeon05 (ID=19)
10.0.40.5 koro xeon05 xeon05-eth0
10.0.42.5 koro-ib xeon05-ib0
10.0.40.105 koro-ipmi xeon05-ipmi0
# Node Entry for node: xeon06 (ID=20)
10.0.40.6 xeon06 xeon06-eth0
10.0.42.6 xeon06-ib0
10.0.40.106 xeon06-ipmi0
# Node Entry for node: xeon07 (ID=21)
10.0.40.7 hut xeon07 xeon07-eth0
10.0.42.7 hut-ib xeon07-ib0
10.0.40.107 hut-ipmi xeon07-ipmi0
# Node Entry for node: xeon08 (ID=22)
10.0.40.8 eudy xeon08 xeon08-eth0
10.0.42.8 eudy-ib xeon08-ib0
10.0.40.108 eudy-ipmi xeon08-ipmi0
'';
};
}

8
m/common/xeon/ssh.nix Normal file
View File

@@ -0,0 +1,8 @@
{
# Connect to intranet git hosts via proxy
programs.ssh.extraConfig = ''
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
User git
ProxyCommand nc -X connect -x hut:23080 %h %p
'';
}

View File

@@ -2,14 +2,13 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
./kernel/kernel.nix
./cpufreq.nix
./fs.nix
./users.nix
../module/hut-substituter.nix
../module/debuginfod.nix
];

View File

@@ -1,112 +0,0 @@
{ lib, config, pkgs, ... }:
{
imports = [
../common/base.nix
../common/xeon/console.nix
../module/amd-uprof.nix
../module/emulation.nix
../module/nvidia.nix
../module/slurm-client.nix
../module/hut-substituter.nix
./wireguard.nix
];
# Don't turn off on August as UPC has different dates.
# Fox works fine on power cuts.
systemd.timers.august-shutdown.enable = false;
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
boot.kernelModules = [ "kvm-amd" "amd_uncore" "amd_hsmp" ];
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
# Use performance for benchmarks
powerManagement.cpuFreqGovernor = "performance";
services.amd-uprof.enable = true;
# Disable NUMA balancing
boot.kernel.sysctl."kernel.numa_balancing" = 0;
# Expose kernel addresses
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
# Disable NMI watchdog to save one hw counter (for AMD uProf)
boot.kernel.sysctl."kernel.nmi_watchdog" = 0;
services.openssh.settings.X11Forwarding = true;
services.fail2ban.enable = true;
networking = {
timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
hostName = "fox";
# UPC network (may change over time, use DHCP)
# Public IP configuration:
# - Hostname: fox.ac.upc.edu
# - IP: 147.83.30.141
# - Gateway: 147.83.30.130
# - NetMask: 255.255.255.192
# Private IP configuration for BMC:
# - Hostname: fox-ipmi.ac.upc.edu
# - IP: 147.83.35.27
# - Gateway: 147.83.35.2
# - NetMask: 255.255.255.0
interfaces.enp1s0f0np0.useDHCP = true;
};
# Recommended for new graphics cards
hardware.nvidia.open = true;
# Mount NVME disks
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
# Mount the NFS home
fileSystems."/nfs/home" = {
device = "10.106.0.30:/home";
fsType = "nfs";
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
# Make a /nvme{0,1}/$USER directory for each user.
systemd.services.create-nvme-dirs = let
# Take only normal users in fox
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}"
]) users);
script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "local-fs.target" ];
after = [ "local-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# Only allow SSH connections from users who have a SLURM allocation
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
security.pam.services.sshd.rules.account.slurm = {
control = "required";
enable = true;
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
args = [ "log_level=debug5" ];
order = 999999; # Make it last one
};
# Disable systemd session (pam_systemd.so) as it will conflict with the
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
# into the slurmstepd task and then into the systemd session, which is not
# what we want, otherwise it will linger even if all jobs are gone.
security.pam.services.sshd.startSession = lib.mkForce false;
}

View File

@@ -1,53 +0,0 @@
{ config, ... }:
{
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgFox.file = ../../secrets/wg-fox.age;
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
# "wg0" is the network interface name. You can name the interface arbitrarily.
wg0 = {
# Determines the IP address and subnet of the server's end of the tunnel interface.
ips = [ "10.106.0.1/24" ];
# The port that WireGuard listens to. Must be accessible by the client.
listenPort = 666;
# Path to the private key file.
privateKeyFile = config.age.secrets.wgFox.path;
# Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
peers = [
# List of allowed peers.
{
name = "apex";
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
allowedIPs = [ "10.106.0.30/32" ];
}
{
name = "raccoon";
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
}
];
};
};
networking.hosts = {
"10.106.0.30" = [ "apex" ];
"10.106.0.236" = [ "raccoon" ];
"10.0.44.4" = [ "tent" ];
};
networking.firewall = {
extraCommands = ''
# Accept slurm connections to slurmd from apex (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
'';
};
}

View File

@@ -3,12 +3,160 @@ modules:
prober: http
timeout: 5s
http:
proxy_url: "http://127.0.0.1:23080"
skip_resolve_phase_with_proxy: true
follow_redirects: true
preferred_ip_protocol: "ip4"
valid_status_codes: [] # Defaults to 2xx
method: GET
http_with_proxy:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
skip_resolve_phase_with_proxy: true
http_with_proxy_and_headers:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
proxy_connect_header:
Proxy-Authorization:
- Bearer token
http_post_2xx:
prober: http
timeout: 5s
http:
method: POST
headers:
Content-Type: application/json
body: '{}'
http_post_body_file:
prober: http
timeout: 5s
http:
method: POST
body_file: "/files/body.txt"
http_basic_auth_example:
prober: http
timeout: 5s
http:
method: POST
headers:
Host: "login.example.com"
basic_auth:
username: "username"
password: "mysecret"
http_2xx_oauth_client_credentials:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
follow_redirects: true
preferred_ip_protocol: "ip4"
valid_status_codes:
- 200
- 201
oauth2:
client_id: "client_id"
client_secret: "client_secret"
token_url: "https://api.example.com/token"
endpoint_params:
grant_type: "client_credentials"
http_custom_ca_example:
prober: http
http:
method: GET
tls_config:
ca_file: "/certs/my_cert.crt"
http_gzip:
prober: http
http:
method: GET
compression: gzip
http_gzip_with_accept_encoding:
prober: http
http:
method: GET
compression: gzip
headers:
Accept-Encoding: gzip
tls_connect:
prober: tcp
timeout: 5s
tcp:
tls: true
tcp_connect_example:
prober: tcp
timeout: 5s
imap_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "OK.*STARTTLS"
- send: ". STARTTLS"
- expect: "OK"
- starttls: true
- send: ". capability"
- expect: "CAPABILITY IMAP4rev1"
smtp_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "^220 ([^ ]+) ESMTP (.+)$"
- send: "EHLO prober\r"
- expect: "^250-STARTTLS"
- send: "STARTTLS\r"
- expect: "^220"
- starttls: true
- send: "EHLO prober\r"
- expect: "^250-AUTH"
- send: "QUIT\r"
irc_banner_example:
prober: tcp
timeout: 5s
tcp:
query_response:
- send: "NICK prober"
- send: "USER prober prober prober :prober"
- expect: "PING :([^ ]+)"
send: "PONG ${1}"
- expect: "^:[^ ]+ 001"
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"
dns_udp_example:
prober: dns
timeout: 5s
dns:
query_name: "www.prometheus.io"
query_type: "A"
valid_rcodes:
- NOERROR
validate_answer_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
fail_if_all_match_regexp:
- ".*127.0.0.1"
fail_if_not_matches_regexp:
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
fail_if_none_matches_regexp:
- "127.0.0.1"
validate_authority_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
validate_additional_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
dns_soa:
prober: dns
dns:
query_name: "prometheus.io"
query_type: "SOA"
dns_tcp_example:
prober: dns
dns:
transport_protocol: "tcp" # defaults to "udp"
preferred_ip_protocol: "ip4" # defaults to "ip6"
query_name: "www.prometheus.io"

View File

@@ -1,42 +1,27 @@
{ config, pkgs, lib, ... }:
{ config, pkgs, ... }:
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/ceph.nix
../module/debuginfod.nix
../module/emulation.nix
../module/slurm-client.nix
./gitlab-runner.nix
./monitoring.nix
./nfs.nix
./slurm-server.nix
./nix-serve.nix
./public-inbox.nix
./gitea.nix
./msmtp.nix
./postgresql.nix
./nginx.nix
./p.nix
#./pxe.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f";
fileSystems = {
"/" = lib.mkForce {
device = "/dev/disk/by-label/nvme";
fsType = "ext4";
neededForBoot = true;
options = [ "noatime" ];
};
"/boot" = lib.mkForce {
device = "/dev/disk/by-label/nixos-boot";
fsType = "ext4";
neededForBoot = true;
};
};
boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";
networking = {
hostName = "hut";
@@ -54,11 +39,6 @@
iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept
'';
# Flush all rules and chains on stop so it won't break on start
extraStopCommands = ''
iptables -F
iptables -X
'';
};
};

View File

@@ -1,9 +1,8 @@
{ pkgs, lib, config, ... }:
{
age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age;
age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age;
age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age;
age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age;
age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age;
services.gitlab-runner = {
enable = true;
@@ -22,89 +21,20 @@
"--docker-network-mode host"
];
environmentVariables = {
https_proxy = "http://hut:23080";
http_proxy = "http://hut:23080";
https_proxy = "http://localhost:23080";
http_proxy = "http://localhost:23080";
};
};
in {
# For pm.bsc.es/gitlab
gitlab-pm-shell = common-shell // {
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path;
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path;
};
gitlab-pm-docker = common-docker // {
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path;
};
gitlab-bsc-docker = {
# gitlab.bsc.es still uses the old token mechanism
registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path;
tagList = [ "docker" "hut" ];
environmentVariables = {
# We cannot access the hut local interface from docker, so we connect
# to hut directly via the ethernet one.
https_proxy = "http://hut:23080";
http_proxy = "http://hut:23080";
};
executor = "docker";
dockerImage = "alpine";
dockerVolumes = [
"/nix/store:/nix/store:ro"
"/nix/var/nix/db:/nix/var/nix/db:ro"
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
];
dockerExtraHosts = [
# Required to pass the proxy via hut
"hut:10.0.40.7"
];
dockerDisableCache = true;
registrationFlags = [
# Increase build log length to 64 MiB
"--output-limit 65536"
];
preBuildScript = pkgs.writeScript "setup-container" ''
mkdir -p -m 0755 /nix/var/log/nix/drvs
mkdir -p -m 0755 /nix/var/nix/gcroots
mkdir -p -m 0755 /nix/var/nix/profiles
mkdir -p -m 0755 /nix/var/nix/temproots
mkdir -p -m 0755 /nix/var/nix/userpool
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
mkdir -p -m 0700 "$HOME/.nix-defexpr"
mkdir -p -m 0700 "$HOME/.ssh"
cat > "$HOME/.ssh/config" << EOF
Host bscpm04.bsc.es gitlab-internal.bsc.es
User git
ProxyCommand nc -X connect -x hut:23080 %h %p
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
ProxyCommand nc -X connect -x hut:23080 %h %p
EOF
cat >> "$HOME/.ssh/known_hosts" << EOF
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
EOF
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
# Required to load SSL certificate paths
. ${pkgs.cacert}/nix-support/setup-hook
'';
environmentVariables = {
ENV = "/etc/profile";
USER = "root";
NIX_REMOTE = "daemon";
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path;
};
};
};
};
# DOCKER* chains are useless, override at FORWARD and nixos-fw
networking.firewall.extraCommands = ''
# Don't forward any traffic from docker
iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse
# Allow incoming traffic from docker to 23080
iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT
'';
#systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash";
systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false;

View File

@@ -1,31 +0,0 @@
{ pkgs, config, lib, ... }:
let
gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { }
''
cp ${./gpfs-probe.sh} $out;
chmod +x $out
''
;
in
{
# Use a new user to handle the SSH keys
users.groups.ssh-robot = { };
users.users.ssh-robot = {
description = "SSH Robot";
isNormalUser = true;
home = "/var/lib/ssh-robot";
};
systemd.services.gpfs-probe = {
description = "Daemon to report GPFS latency via SSH";
path = [ pkgs.openssh pkgs.netcat ];
after = [ "network.target" ];
wantedBy = [ "default.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}";
User = "ssh-robot";
Group = "ssh-robot";
};
};
}

View File

@@ -1,18 +0,0 @@
#!/bin/sh
N=500
t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
if [ -z "$t" ]; then
t="5.00"
fi
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
# HELP gpfs_touch_latency Time to create $N files.
# TYPE gpfs_touch_latency gauge
gpfs_touch_latency $t
EOF

13
m/hut/ipmi.yml Normal file
View File

@@ -0,0 +1,13 @@
modules:
default:
collectors:
- bmc
- ipmi
- chassis
lan:
collectors:
- ipmi
- chassis
user: ""
pass: ""

View File

@@ -1,13 +1,7 @@
{ config, lib, ... }:
{
imports = [
../module/slurm-exporter.nix
../module/meteocat-exporter.nix
../module/upc-qaire-exporter.nix
./gpfs-probe.nix
../module/nix-daemon-exporter.nix
];
imports = [ ../module/slurm-exporter.nix ];
age.secrets.grafanaJungleRobotPassword = {
file = ../../secrets/jungle-robot-password.age;
@@ -15,8 +9,6 @@
mode = "400";
};
age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age;
services.grafana = {
enable = true;
settings = {
@@ -49,7 +41,7 @@
services.prometheus = {
enable = true;
port = 9001;
retentionTime = "5y";
retentionTime = "1y";
listenAddress = "127.0.0.1";
};
@@ -78,13 +70,13 @@
enable = true;
group = "root";
user = "root";
configFile = config.age.secrets.ipmiYml.path;
# extraFlags = [ "--log.level=debug" ];
configFile = ./ipmi.yml;
#extraFlags = [ "--log.level=debug" ];
listenAddress = "127.0.0.1";
};
node = {
enable = true;
enabledCollectors = [ "systemd" "logind" ];
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
@@ -110,10 +102,6 @@
"127.0.0.1:9252"
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
"127.0.0.1:9341" # Slurm exporter
"127.0.0.1:9966" # GPFS custom exporter
"127.0.0.1:9999" # Nix-daemon custom exporter
"127.0.0.1:9929" # Meteocat custom exporter
"127.0.0.1:9928" # UPC Qaire custom exporter
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
];
}];
@@ -169,9 +157,6 @@
"8.8.8.8"
"ssfhead"
"anella-bsc.cesca.cat"
"upc-anella.cesca.cat"
"fox.ac.upc.edu"
"arenys5.ac.upc.edu"
];
}];
relabel_configs = [
@@ -217,7 +202,7 @@
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
separator = ";";
regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end
regex = "(.*)";
target_label = "instance";
replacement = "\${1}";
action = "replace";
@@ -259,14 +244,6 @@
module = [ "raccoon" ];
};
}
{
job_name = "raccoon";
static_configs = [
{
targets = [ "127.0.0.1:19002" ]; # Node exporter
}
];
}
];
};
}

View File

@@ -1,76 +0,0 @@
{ theFlake, pkgs, ... }:
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = pkgs.fetchgit {
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
};
buildInputs = [ pkgs.hugo ];
buildPhase = ''
rm -rf public/
hugo
'';
installPhase = ''
cp -r public $out
'';
# Don't mess doc/
dontFixup = true;
};
in
{
networking.firewall.allowedTCPPorts = [ 80 ];
services.nginx = {
enable = true;
virtualHosts."jungle.bsc.es" = {
root = "${website}";
listen = [
{
addr = "0.0.0.0";
port = 80;
}
];
extraConfig = ''
set_real_ip_from 127.0.0.1;
set_real_ip_from 84.88.52.107;
real_ip_recursive on;
real_ip_header X-Forwarded-For;
location /git {
rewrite ^/git$ / break;
rewrite ^/git/(.*) /$1 break;
proxy_pass http://127.0.0.1:3000;
proxy_redirect http:// $scheme://;
}
location /cache {
rewrite ^/cache/(.*) /$1 break;
proxy_pass http://127.0.0.1:5000;
proxy_redirect http:// $scheme://;
}
location /lists {
proxy_pass http://127.0.0.1:8081;
proxy_redirect http:// $scheme://;
}
location /grafana {
proxy_pass http://127.0.0.1:2342;
proxy_redirect http:// $scheme://;
proxy_set_header Host $host;
# Websockets
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ ^/~(.+?)(/.*)?$ {
alias /ceph/home/$1/public_html$2;
index index.html index.htm;
autoindex on;
absolute_redirect off;
}
location /p/ {
alias /ceph/p/;
}
'';
};
};
}

View File

@@ -1,43 +0,0 @@
{ pkgs, lib, config, ... }:
let
p = pkgs.writeShellScriptBin "p" ''
set -e
cd /ceph
pastedir="p/$USER"
mkdir -p "$pastedir"
ext="txt"
if [ -n "$1" ]; then
ext="$1"
fi
out=$(mktemp "$pastedir/XXXXXXXX.$ext")
cat > "$out"
chmod go+r "$out"
echo "https://jungle.bsc.es/$out"
'';
in
{
environment.systemPackages = with pkgs; [ p ];
# Make sure we have a directory per user. We cannot use the nice
# systemd-tmpfiles-setup.service service because this is a remote FS, and it
# may not be mounted when it runs.
systemd.services.create-paste-dirs = let
# Take only normal users in hut
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}"
]) users);
script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "remote-fs.target" ];
after = [ "remote-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
}

7
m/hut/slurm-server.nix Normal file
View File

@@ -0,0 +1,7 @@
{ ... }:
{
services.slurm = {
server.enable = true;
};
}

View File

@@ -1,15 +1,15 @@
- targets:
- owl1-ipmi
- owl2-ipmi
- xeon03-ipmi
- xeon04-ipmi
- koro-ipmi
- weasel-ipmi
- hut-ipmi
- eudy-ipmi
- 10.0.40.101
- 10.0.40.102
- 10.0.40.103
- 10.0.40.104
- 10.0.40.105
- 10.0.40.106
- 10.0.40.107
- 10.0.40.108
# Storage
- bay-ipmi
- oss01-ipmi
- lake2-ipmi
- 10.0.40.141
- 10.0.40.142
- 10.0.40.143
labels:
job: ipmi-lan

View File

@@ -2,7 +2,7 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
../eudy/cpufreq.nix

View File

@@ -2,9 +2,8 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/monitoring.nix
../module/hut-substituter.nix
];
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";

View File

@@ -1,70 +0,0 @@
{
# In physical order from top to bottom (see note below)
ssf = {
# Switches for Ethernet and OmniPath
switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; };
switch-opa = { pos=41; size=1; };
# SSF login
apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; };
# Storage
bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; };
lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; };
# Compute xeon
owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; };
owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; };
xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
# Slot 34 empty
koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };
# 16 KNL nodes, 4 per chassis
knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; };
knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; };
knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; };
knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; };
# Slot 19 empty
# EPI (hw team, guessed order)
epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; };
epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; };
epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; };
anon = { pos=14; size=2; }; # Unlabeled machine. Operative
# These are old and decommissioned (off)
power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; };
powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; };
gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; };
odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; };
amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic
moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; };
};
bsc2218 = {
raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; };
tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; };
};
upc = {
fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; };
};
# NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the
# bottom. Example:
#
# | ... | - [pos+size] <--- Label in chassis
# +--------+
# | node | - [pos+1]
# | 2U | - [pos]
# +------- +
# | ... | - [pos-1]
#
# NOTE: The board and sn refers to the FRU information (Board Product and
# Board Serial) via `ipmitool fru print 0`.
}

View File

@@ -1,49 +0,0 @@
{ config, lib, pkgs, ... }:
{
options = {
services.amd-uprof = {
enable = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Whether to enable AMD uProf.";
};
};
};
# Only setup amd-uprof if enabled
config = lib.mkIf config.services.amd-uprof.enable {
# First make sure that we add the module to the list of available modules
# in the kernel matching the same kernel version of this configuration.
boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ];
boot.kernelModules = [ "AMDPowerProfiler" ];
# Make the userspace tools available in $PATH.
environment.systemPackages = with pkgs; [ amd-uprof ];
# The AMDPowerProfiler module doesn't create the /dev device nor it emits
# any uevents, so we cannot use udev rules to automatically create the
# device. Instead, we run a systemd unit that does it after loading the
# modules.
systemd.services.amd-uprof-device = {
description = "Create /dev/AMDPowerProfiler device";
after = [ "systemd-modules-load.service" ];
wantedBy = [ "multi-user.target" ];
unitConfig.ConditionPathExists = [
"/proc/AMDPowerProfiler/device"
"!/dev/AMDPowerProfiler"
];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" ''
mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0
'';
ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" ''
rm -f /dev/AMDPowerProfiler
'';
};
};
};
}

View File

@@ -1,13 +0,0 @@
{ config, ... }:
{
nix.settings =
# Don't add hut as a cache to itself
assert config.networking.hostName != "hut";
{
extra-substituters = [ "http://hut/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
# Set a low timeout in case hut is down
connect-timeout = 3; # seconds
};
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-meteocat-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter";
};
};
}

View File

@@ -1,26 +0,0 @@
#!/bin/sh
# Locate nix daemon pid
nd=$(pgrep -o nix-daemon)
# Locate children of nix-daemon
pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children")
# For each children, locate 2nd level children
pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n')
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
# HELP nix_daemon_build Nix daemon derivation build state.
# TYPE nix_daemon_build gauge
EOF
for pid in $pids2; do
name=$(cat /proc/$pid/environ 2>/dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.')
user=$(ps -o uname= -p "$pid")
if [ -n "$name" -a -n "$user" ]; then
printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name"
fi
done

View File

@@ -1,23 +0,0 @@
{ pkgs, config, lib, ... }:
let
script = pkgs.runCommand "nix-daemon-exporter.sh" { }
''
cp ${./nix-daemon-builds.sh} $out;
chmod +x $out
''
;
in
{
systemd.services.nix-daemon-exporter = {
description = "Daemon to export nix-daemon metrics";
path = [ pkgs.procps pkgs.ripgrep ];
wantedBy = [ "default.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}";
# Needed root to read the environment, potentially unsafe
User = "root";
Group = "root";
};
};
}

View File

@@ -1,20 +0,0 @@
{ lib, config, pkgs, ... }:
{
# Configure Nvidia driver to use with CUDA
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set");
hardware.graphics.enable = true;
nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
# enable support for derivations which require nvidia-gpu to be available
# > requiredSystemFeatures = [ "cuda" ];
programs.nix-required-mounts.enable = true;
programs.nix-required-mounts.presets.nvidia-gpu.enable = true;
# They forgot to add the symlink
programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [
config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument
];
environment.systemPackages = [ pkgs.cudainfo ];
}

View File

@@ -1,68 +0,0 @@
{ config, lib, pkgs, ... }:
let
cfg = config.services.p;
in
{
options = {
services.p = {
enable = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Whether to enable the p service.";
};
path = lib.mkOption {
type = lib.types.str;
default = "/var/lib/p";
description = "Where to save the pasted files on disk.";
};
url = lib.mkOption {
type = lib.types.str;
default = "https://jungle.bsc.es/p";
description = "URL prefix for the printed file.";
};
};
};
config = lib.mkIf cfg.enable {
environment.systemPackages = let
p = pkgs.writeShellScriptBin "p" ''
set -e
pastedir="${cfg.path}/$USER"
cd "$pastedir"
ext="txt"
if [ -n "$1" ]; then
ext="$1"
fi
out=$(mktemp "XXXXXXXX.$ext")
cat > "$out"
chmod go+r "$out"
echo "${cfg.url}/$USER/$out"
'';
in [ p ];
systemd.services.p = let
# Take only normal users
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
# Create a directory for each user
commands = lib.concatLists (lib.mapAttrsToList (_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 ${cfg.path}/${user.name}"
]) users);
in {
description = "P service setup";
requires = [ "network-online.target" ];
#wants = [ "remote-fs.target" ];
#after = [ "remote-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = pkgs.writeShellScript "p-init.sh" (''
install -d -o root -g root -m 0755 ${cfg.path}
'' + (lib.concatLines commands));
};
};
};
}

View File

@@ -1,33 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.power.policy;
in
{
options = {
power.policy = mkOption {
type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]);
default = null;
description = "Set power policy to use via IPMI.";
};
};
config = mkIf (cfg != null) {
systemd.services."power-policy" = {
description = "Set power policy to use via IPMI";
wantedBy = [ "multi-user.target" ];
unitConfig = {
StartLimitBurst = "10";
StartLimitIntervalSec = "10m";
};
serviceConfig = {
ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}";
Type = "oneshot";
Restart = "on-failure";
RestartSec = "5s";
};
};
};
}

View File

@@ -1,10 +1,33 @@
{ lib, ... }:
{ config, pkgs, lib, ... }:
{
imports = [
./slurm-common.nix
];
let
suspendProgram = pkgs.writeScript "suspend.sh" ''
#!/usr/bin/env bash
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeScript "resume.sh" ''
#!/usr/bin/env bash
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
in {
systemd.services.slurmd.serviceConfig = {
# Kill all processes in the control group on stop/restart. This will kill
# all the jobs running, so ensure that we only upgrade when the nodes are
@@ -12,13 +35,73 @@
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
KillMode = lib.mkForce "control-group";
# If slurmd fails to contact the control server it will fail, causing the
# node to remain out of service until manually restarted. Always try to
# restart it.
Restart = "always";
RestartSec = "30s";
};
services.slurm.client.enable = true;
services.slurm = {
client.enable = true;
controlMachine = "hut";
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
extraConfig = ''
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
# not with Intel MPI. For that use the compatibility shim libpmi.so
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
# library in SLURM (--mpi=pmix). See more details here:
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
MpiDefault=pmix
# When a node reboots return that node to the slurm queue as soon as it
# becomes operative again.
ReturnToService=2
# Track all processes by using a cgroup
ProctrackType=proctrack/cgroup
# Enable task/affinity to allow the jobs to run in a specified subset of
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=hut
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_Core
'';
};
age.secrets.mungeKey = {
file = ../../secrets/munge-key.age;
owner = "munge";
group = "munge";
};
services.munge = {
enable = true;
password = config.age.secrets.mungeKey.path;
};
}

View File

@@ -1,115 +0,0 @@
{ config, pkgs, ... }:
let
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Shutting down host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
done
'';
resumeProgram = pkgs.writeShellScript "resume.sh" ''
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
set -x
export "PATH=/run/current-system/sw/bin:$PATH"
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
hosts=$(scontrol show hostnames $1)
for host in $hosts; do
echo Starting host: $host
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
done
'';
in {
services.slurm = {
controlMachine = "apex";
clusterName = "jungle";
nodeName = [
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
extraConfig = ''
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
# not with Intel MPI. For that use the compatibility shim libpmi.so
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
# library in SLURM (--mpi=pmix). See more details here:
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
MpiDefault=pmix
# When a node reboots return that node to the slurm queue as soon as it
# becomes operative again.
ReturnToService=2
# Track all processes by using a cgroup
ProctrackType=proctrack/cgroup
# Enable task/affinity to allow the jobs to run in a specified subset of
# the resources. Use the task/cgroup plugin to enable process containment.
TaskPlugin=task/affinity,task/cgroup
# Power off unused nodes until they are requested
SuspendProgram=${suspendProgram}
SuspendTimeout=60
ResumeProgram=${resumeProgram}
ResumeTimeout=300
SuspendExcNodes=fox
# Turn the nodes off after 1 hour of inactivity
SuspendTime=3600
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_Core
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
# This sets up the "extern" step into which ssh-launched processes will be
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
# when a task runs (srun) so we can ssh early.
PrologFlags=Alloc,Contain,X11
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
# adopted by the external step, similar to tasks running in regular steps
# LaunchParameters=ulimit_pam_adopt
SlurmdDebug=debug5
#DebugFlags=Protocol,Cgroup
'';
extraCgroupConfig = ''
CgroupPlugin=cgroup/v2
#ConstrainCores=yes
'';
};
# Place the slurm config in /etc as this will be required by PAM
environment.etc.slurm.source = config.services.slurm.etcSlurm;
age.secrets.mungeKey = {
file = ../../secrets/munge-key.age;
owner = "munge";
group = "munge";
};
services.munge = {
enable = true;
password = config.age.secrets.mungeKey.path;
};
}

View File

@@ -1,23 +0,0 @@
{ ... }:
{
imports = [
./slurm-common.nix
];
services.slurm.server.enable = true;
networking.firewall = {
extraCommands = ''
# Accept slurm connections to controller from compute nodes
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from compute nodes for srun
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
# Accept slurm connections to controller from fox (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
# Accept slurm connections from fox for srun (via wireguard)
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
'';
};
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-upc-qaire-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter";
};
};
}

View File

@@ -1,35 +0,0 @@
{config, ...}:
{
age.secrets.vpn-dac-login.file = ../../secrets/vpn-dac-login.age;
age.secrets.vpn-dac-client-key.file = ../../secrets/vpn-dac-client-key.age;
services.openvpn.servers = {
# systemctl status openvpn-dac.service
dac = {
config = ''
client
dev tun
proto tcp
remote vpn.ac.upc.edu 1194
remote vpn.ac.upc.edu 80
resolv-retry infinite
nobind
persist-key
persist-tun
ca ${./vpn-dac/ca.crt}
cert ${./vpn-dac/client.crt}
# Only key needs to be secret
key ${config.age.secrets.vpn-dac-client-key.path}
remote-cert-tls server
comp-lzo
verb 3
auth-user-pass ${config.age.secrets.vpn-dac-login.path}
reneg-sec 0
# Only route fox-ipmi
pull-filter ignore "route "
route 147.83.35.27 255.255.255.255
'';
};
};
}

View File

@@ -1,31 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIFUjCCBDqgAwIBAgIJAJH118PApk5hMA0GCSqGSIb3DQEBCwUAMIHLMQswCQYD
VQQGEwJFUzESMBAGA1UECBMJQmFyY2Vsb25hMRIwEAYDVQQHEwlCYXJjZWxvbmEx
LTArBgNVBAoTJFVuaXZlcnNpdGF0IFBvbGl0ZWNuaWNhIGRlIENhdGFsdW55YTEk
MCIGA1UECxMbQXJxdWl0ZWN0dXJhIGRlIENvbXB1dGFkb3JzMRAwDgYDVQQDEwdM
Q0FDIENBMQ0wCwYDVQQpEwRMQ0FDMR4wHAYJKoZIhvcNAQkBFg9sY2FjQGFjLnVw
Yy5lZHUwHhcNMTYwMTEyMTI0NDIxWhcNNDYwMTEyMTI0NDIxWjCByzELMAkGA1UE
BhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0w
KwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAi
BgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENB
QyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMu
ZWR1MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0CteSeof7Xwi51kC
F0nQ4E9iR5Lq7wtfRuVPn6JJcIxJJ6+F9gr4R/HIHTztW4XAzReE36DYfexupx3D
6UgQIkMLlVyGqRbulNF+RnCx20GosF7Dm4RGBVvOxBP1PGjYq/A+XhaaDAFd0cOF
LMNkzuYP7PF0bnBEaHnxmN8bPmuyDyas7fK9AAc3scyWT2jSBPbOVFvCJwPg8MH9
V/h+hKwL/7hRt1MVfVv2qyIuKwTki8mUt0RcVbP7oJoRY5K1+R52phIz/GL/b4Fx
L6MKXlQxLi8vzP4QZXgCMyV7oFNdU3VqCEXBA11YIRvsOZ4QS19otIk/ZWU5x+HH
LAIJ7wIDAQABo4IBNTCCATEwHQYDVR0OBBYEFNyezX1cH1N4QR14ebBpljqmtE7q
MIIBAAYDVR0jBIH4MIH1gBTcns19XB9TeEEdeHmwaZY6prRO6qGB0aSBzjCByzEL
MAkGA1UEBhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vs
b25hMS0wKwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVu
eWExJDAiBgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UE
AxMHTENBQyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0Bh
Yy51cGMuZWR1ggkAkfXXw8CmTmEwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsF
AAOCAQEAUAmOvVXIQrR+aZVO0bOTeugKBHB75eTIZSIHIn2oDUvDbAP5GXIJ56A1
6mZXxemSMY8/9k+pRcwJhfat3IgvAN159XSqf9kRv0NHgc3FWUI1Qv/BsAn0vJO/
oK0dbmbbRWqt86qNrCN+cUfz5aovvxN73jFfnvfDQFBk/8enj9wXxYfokjjLPR1Q
+oTkH8dY68qf71oaUB9MndppPEPSz0K1S6h1XxvJoSu9MVSXOQHiq1cdZdxRazI3
4f7q9sTCL+khwDAuZxAYzlEYxFFa/NN8PWU6xPw6V+t/aDhOiXUPJQB/O/K7mw3Z
TQQx5NqM7B5jjak5fauR3/oRD8XXsA==
-----END CERTIFICATE-----

View File

@@ -1,100 +0,0 @@
Certificate:
Data:
Version: 3 (0x2)
Serial Number: 2 (0x2)
Signature Algorithm: sha256WithRSAEncryption
Issuer: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
Validity
Not Before: Jan 12 12:45:41 2016 GMT
Not After : Jan 12 12:45:41 2046 GMT
Subject: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=client/name=LCAC/emailAddress=lcac@ac.upc.edu
Subject Public Key Info:
Public Key Algorithm: rsaEncryption
Public-Key: (2048 bit)
Modulus:
00:97:99:fa:7a:0e:4d:e2:1d:a5:b1:a8:14:18:64:
c7:66:bf:de:99:1d:92:3b:86:82:4d:95:39:f7:a6:
56:49:97:14:4f:e3:37:00:6c:f4:d0:1d:56:79:e7:
19:b5:dd:36:15:8e:1d:57:7b:59:29:d2:11:bf:58:
48:e0:f7:41:3d:16:64:8d:a2:0b:4a:ac:fa:c6:83:
dc:10:2a:2c:d9:97:48:ee:11:2a:bc:4b:60:dd:b9:
2e:8f:45:ca:87:0b:38:65:1c:f8:a2:1d:f9:50:aa:
6e:60:f9:48:df:57:12:23:e1:e7:0c:81:5c:9f:c5:
b2:e6:99:99:95:30:6d:57:36:06:8c:fd:fb:f9:4f:
60:d2:3c:ba:ae:28:56:2f:da:58:5c:e8:c5:7b:ec:
76:d9:28:6e:fb:8c:07:f9:d7:23:c3:72:76:3c:fa:
dc:20:67:8f:cc:16:e0:91:07:d5:68:f9:20:4d:7d:
5c:2d:02:04:16:76:52:f3:53:be:a3:dc:0d:d5:fb:
6b:55:29:f3:52:35:c8:7d:99:d1:4a:94:be:b1:8e:
fd:85:18:25:eb:41:e9:56:da:af:62:84:20:0a:00:
17:94:92:94:91:6a:f8:54:37:17:ee:1e:bb:fb:93:
71:91:d9:e4:e9:b8:3b:18:7d:6d:7d:4c:ce:58:55:
f9:41
Exponent: 65537 (0x10001)
X509v3 extensions:
X509v3 Basic Constraints:
CA:FALSE
Netscape Comment:
Easy-RSA Generated Certificate
X509v3 Subject Key Identifier:
1B:88:06:D5:33:1D:5C:48:46:B5:DE:78:89:36:96:91:3A:74:43:18
X509v3 Authority Key Identifier:
keyid:DC:9E:CD:7D:5C:1F:53:78:41:1D:78:79:B0:69:96:3A:A6:B4:4E:EA
DirName:/C=ES/ST=Barcelona/L=Barcelona/O=Universitat Politecnica de Catalunya/OU=Arquitectura de Computadors/CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
serial:91:F5:D7:C3:C0:A6:4E:61
X509v3 Extended Key Usage:
TLS Web Client Authentication
X509v3 Key Usage:
Digital Signature
X509v3 Subject Alternative Name:
DNS:client
Signature Algorithm: sha256WithRSAEncryption
42:e8:50:b2:e7:88:75:86:0b:bb:29:e3:aa:c6:0e:4c:e8:ea:
3d:0c:02:31:7f:3b:80:0c:3f:80:af:45:d6:62:27:a0:0e:e7:
26:09:12:97:95:f8:d9:9b:89:b5:ef:56:64:f1:de:82:74:e0:
31:0a:cc:90:0a:bd:50:b8:54:95:0a:ae:3b:40:df:76:b6:d1:
01:2e:f3:96:9f:52:d4:e9:14:6d:b7:14:9d:45:99:33:36:2a:
01:0b:15:1a:ed:55:dc:64:83:65:1a:06:42:d9:c7:dc:97:d4:
02:81:c2:58:2b:ea:e4:b7:ae:84:3a:e4:3f:f1:2e:fa:ec:f3:
40:5d:b8:6a:d5:5e:e1:e8:2f:e2:2f:48:a4:38:a1:4f:22:e3:
4f:66:94:aa:02:78:9a:2b:7a:5d:aa:aa:51:a5:e3:d0:91:e9:
1d:f9:08:ed:8b:51:c9:a6:af:46:85:b5:1c:ed:12:a1:28:33:
75:36:00:d8:5c:14:65:96:c0:28:7d:47:50:a4:89:5f:b0:72:
1a:4b:13:17:26:0f:f0:b8:65:3c:e9:96:36:f9:bf:90:59:33:
87:1f:01:03:25:f8:f0:3a:9b:33:02:d0:0a:43:b5:0a:cf:62:
a1:45:38:37:07:9d:9c:94:0b:31:c6:3c:34:b7:fc:5a:0c:e4:
bf:23:f6:7d
-----BEGIN CERTIFICATE-----
MIIFqjCCBJKgAwIBAgIBAjANBgkqhkiG9w0BAQsFADCByzELMAkGA1UEBhMCRVMx
EjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0wKwYDVQQK
EyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAiBgNVBAsT
G0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENBQyBDQTEN
MAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MB4X
DTE2MDExMjEyNDU0MVoXDTQ2MDExMjEyNDU0MVowgcoxCzAJBgNVBAYTAkVTMRIw
EAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNlbG9uYTEtMCsGA1UEChMk
VW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1bnlhMSQwIgYDVQQLExtB
cnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxDzANBgNVBAMTBmNsaWVudDENMAsG
A1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MIIBIjAN
BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl5n6eg5N4h2lsagUGGTHZr/emR2S
O4aCTZU596ZWSZcUT+M3AGz00B1WeecZtd02FY4dV3tZKdIRv1hI4PdBPRZkjaIL
Sqz6xoPcECos2ZdI7hEqvEtg3bkuj0XKhws4ZRz4oh35UKpuYPlI31cSI+HnDIFc
n8Wy5pmZlTBtVzYGjP37+U9g0jy6rihWL9pYXOjFe+x22Shu+4wH+dcjw3J2PPrc
IGePzBbgkQfVaPkgTX1cLQIEFnZS81O+o9wN1ftrVSnzUjXIfZnRSpS+sY79hRgl
60HpVtqvYoQgCgAXlJKUkWr4VDcX7h67+5Nxkdnk6bg7GH1tfUzOWFX5QQIDAQAB
o4IBljCCAZIwCQYDVR0TBAIwADAtBglghkgBhvhCAQ0EIBYeRWFzeS1SU0EgR2Vu
ZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQWBBQbiAbVMx1cSEa13niJNpaROnRD
GDCCAQAGA1UdIwSB+DCB9YAU3J7NfVwfU3hBHXh5sGmWOqa0TuqhgdGkgc4wgcsx
CzAJBgNVBAYTAkVTMRIwEAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNl
bG9uYTEtMCsGA1UEChMkVW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1
bnlhMSQwIgYDVQQLExtBcnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxEDAOBgNV
BAMTB0xDQUMgQ0ExDTALBgNVBCkTBExDQUMxHjAcBgkqhkiG9w0BCQEWD2xjYWNA
YWMudXBjLmVkdYIJAJH118PApk5hMBMGA1UdJQQMMAoGCCsGAQUFBwMCMAsGA1Ud
DwQEAwIHgDARBgNVHREECjAIggZjbGllbnQwDQYJKoZIhvcNAQELBQADggEBAELo
ULLniHWGC7sp46rGDkzo6j0MAjF/O4AMP4CvRdZiJ6AO5yYJEpeV+NmbibXvVmTx
3oJ04DEKzJAKvVC4VJUKrjtA33a20QEu85afUtTpFG23FJ1FmTM2KgELFRrtVdxk
g2UaBkLZx9yX1AKBwlgr6uS3roQ65D/xLvrs80BduGrVXuHoL+IvSKQ4oU8i409m
lKoCeJorel2qqlGl49CR6R35CO2LUcmmr0aFtRztEqEoM3U2ANhcFGWWwCh9R1Ck
iV+wchpLExcmD/C4ZTzpljb5v5BZM4cfAQMl+PA6mzMC0ApDtQrPYqFFODcHnZyU
CzHGPDS3/FoM5L8j9n0=
-----END CERTIFICATE-----

View File

@@ -2,13 +2,12 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix
../module/debuginfod.nix
../module/hut-substituter.nix
];
# Select the this using the ID to avoid mismatches

View File

@@ -2,13 +2,12 @@
{
imports = [
../common/ssf.nix
../common/xeon.nix
../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix
../module/debuginfod.nix
../module/hut-substituter.nix
];
# Select the this using the ID to avoid mismatches

View File

@@ -3,13 +3,10 @@
{
imports = [
../common/base.nix
../common/ssf/hosts.nix
../module/emulation.nix
../module/debuginfod.nix
../module/nvidia.nix
../eudy/kernel/lttng.nix
../eudy/kernel/perf.nix
./wireguard.nix
../module/hut-substituter.nix
];
# Don't install Grub on the disk yet
@@ -30,35 +27,43 @@
address = "84.88.51.152";
prefixLength = 25;
} ];
interfaces.enp5s0f1.ipv4.addresses = [ {
address = "10.0.44.1";
prefixLength = 24;
} ];
nat = {
enable = true;
internalInterfaces = [ "enp5s0f1" ];
externalInterface = "eno0";
};
hosts = {
"10.0.44.4" = [ "tent" ];
"84.88.53.236" = [ "apex" ];
};
};
# Mount the NFS home
fileSystems."/nfs/home" = {
device = "10.106.0.30:/home";
fsType = "nfs";
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
# Enable performance governor
powerManagement.cpuFreqGovernor = "performance";
hardware.nvidia.open = false; # Maxwell is older than Turing architecture
# Configure Nvidia driver to use with CUDA
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
hardware.graphics.enable = true;
nixpkgs.config.allowUnfree = true;
nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
# Disable garbage collection for now
nix.gc.automatic = lib.mkForce false;
# Use nix cache from hut
nix.settings = {
substituters = [ "https://jungle.bsc.es/cache" ];
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
services.openssh.settings.X11Forwarding = true;
nixpkgs.overlays = [
(final: prev: {
xilinx-xrt = prev.callPackage ./xilinx-xrt.nix { };
xilinx-fw = prev.callPackage ./xilinx-fw.nix { };
xilinx-xocl = prev.callPackage ./xilinx-xocl.nix {
kernel = config.boot.kernelPackages.kernel;
};
})
];
boot.extraModulePackages = [ pkgs.xilinx-xocl ];
boot.kernelModules = [ "xclmgmt" "xocl" ];
services.udev.packages = [ pkgs.xilinx-xocl ];
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [ "systemd" ];

View File

@@ -1,48 +0,0 @@
{ config, pkgs, ... }:
{
networking.nat = {
enable = true;
enableIPv6 = false;
externalInterface = "eno0";
internalInterfaces = [ "wg0" ];
};
networking.firewall = {
allowedUDPPorts = [ 666 ];
};
age.secrets.wgRaccoon.file = ../../secrets/wg-raccoon.age;
# Enable WireGuard
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
wg0 = {
ips = [ "10.106.0.236/24" ];
listenPort = 666;
privateKeyFile = config.age.secrets.wgRaccoon.path;
# Public key: QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=
peers = [
{
name = "fox";
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
allowedIPs = [ "10.106.0.1/32" ];
endpoint = "fox.ac.upc.edu:666";
persistentKeepalive = 25;
}
{
name = "apex";
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
allowedIPs = [ "10.106.0.30/32" "10.0.40.0/24" ];
endpoint = "ssfhead.bsc.es:666";
persistentKeepalive = 25;
}
];
};
};
networking.hosts = {
"10.106.0.1" = [ "fox.wg" ];
"10.106.0.30" = [ "apex.wg" ];
};
}

335
m/raccoon/xilinx-create-xsabin.sh Executable file
View File

@@ -0,0 +1,335 @@
#!/bin/bash
## (c) Copyright 2020 Xilinx, Inc. All rights reserved.
##
## This file contains confidential and proprietary information
## of Xilinx, Inc. and is protected under U.S. and
## international copyright and other intellectual property
## laws.
##
## DISCLAIMER
## This disclaimer is not a license and does not grant any
## rights to the materials distributed herewith. Except as
## otherwise provided in a valid license issued to you by
## Xilinx, and to the maximum extent permitted by applicable
## law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
## WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
## AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
## BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
## INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
## (2) Xilinx shall not be liable (whether in contract or tort,
## including negligence, or under any other theory of
## liability) for any loss or damage of any kind or nature
## related to, arising under or in connection with these
## materials, including for any direct, or any indirect,
## special, incidental, or consequential loss or damage
## (including loss of data, profits, goodwill, or any type of
## loss or damage suffered as a result of any action brought
## by a third party) even if such damage or loss was
## reasonably foreseeable or Xilinx had been advised of the
## possibility of the same.
##
## CRITICAL APPLICATIONS
## Xilinx products are not designed or intended to be fail-
## safe, or for use in any application requiring fail-safe
## performance, such as life-support or safety devices or
## systems, Class III medical devices, nuclear facilities,
## applications related to the deployment of airbags, or any
## other applications that could lead to death, personal
## injury, or severe property or environmental damage
## (individually and collectively, "Critical
## Applications"). Customer assumes the sole risk and
## liability of any use of Xilinx products in Critical
## Applications, subject only to applicable laws and
## regulations governing limitations on product liability.
##
## THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
## PART OF THIS FILE AT ALL TIMES.
# This script must be run with root permissions
# if [[ "$EUID" -ne 0 ]]; then
# echo "This script must be run as root."
# exit
# fi
# Get absolute path to this script with any symlinks resolved
realme=$(realpath $0)
scriptpath="${realme%/*}"
echo "This is create_xsabin.sh running from $scriptpath on $(date)"
# The directory above that is the human-readable installation path - probably /opt/xilinx/firmware/<card>/<family>/<partition>/
humanpath=${scriptpath%/*}
pushd $humanpath > /dev/null
# This script may be called during firmware upgrade, in which case the firmware product, branch, version and release
# are provided as script arguments, to help this script to select the new firmware file
if [[ "$#" -ge 4 ]]; then
firmware_upgrade_product=$1
firmware_upgrade_branch=$2
firmware_upgrade_version=$3
firmware_upgrade_release=$4
echo "Run for install of firmware $firmware_upgrade_product-$firmware_upgrade_branch version $firmware_upgrade_version release $firmware_upgrade_release"
elif [[ "$#" -eq 3 ]]; then
# If 3 script arguments, these are the partition name, version and release, so that this script can report them for debug
echo "Run for install of partition $1 version $2 release $3"
fi
# Find the partition_metadata.json link in the install directory
jsonlink="partition_metadata.json"
if [[ ! -e "$jsonlink" ]]; then
echo "Cannot find $jsonlink file in $humanpath - install failed"
exit 1
fi
# Find the machine-readable directory for this partition:
# this is the target of the partition_metadata.json link
if [[ ! -h "$jsonlink" ]]; then
echo "$jsonlink in $humanpath should be a symlink, but it is not - install failed"
exit 1
fi
jsonpath=$(readlink $jsonlink)
if [[ $? -ne 0 ]]; then
echo "Failed to read target of symlink $jsonlink in $humanpath - install failed"
exit 1
fi
if [[ ! -e "$jsonpath" ]]; then
echo "Target of symlink $jsonlink in $humanpath is $jsonpath, which does not exist - install failed"
exit 1
fi
echo "Metadata file is $jsonpath"
machinepath=${jsonpath%/*}
json=${jsonpath##*/}
echo "User install path is $humanpath"
echo "Machine-readable path is $machinepath"
pushd $machinepath > /dev/null
# Parse the partition_metadata.json file to find the required firmware
declare -A firmware
firmware_products=()
product=""
branch="mainline"
major="*"
minor="*"
revision="*"
while IFS= read -r line; do
key=${line#*\"}
key=${key%%\"*}
value=${line%\"*}
value=${value##*\"}
numvalue=$value
if [[ "$numvalue" =~ ^0x ]]; then
numvalue=$(($numvalue))
fi
case "$key" in
"firmware")
# Starts a new section: record current one (except if before the first section)
if [[ -n "$product" ]]; then
firmware_products+=($product)
firmware["$product.branch"]=$branch
firmware["$product.version"]="$major.$minor.$revision"
product=""
branch="mainline"
major="*"
minor="*"
revision="*"
fi
;;
"firmware_product_name")
product=${value,,}
;;
"firmware_branch_name")
branch=${value,,}
;;
"firmware_version_major")
major=$numvalue
;;
"firmware_version_minor")
minor=$numvalue
;;
"firmware_version_revision")
revision=$numvalue
;;
esac
done <<< "$(grep '\"firmware' $json)"
# Record last section
firmware_products+=($product)
firmware["$product.branch"]=$branch
firmware["$product.version"]="$major.$minor.$revision"
# Locate the required firmware in existing installed directories, and build xclbinutil options to add firmware
# For each firmware, there is already a symlink in the human-readable directory's firmware directory
# which points to the existing firmware install directory
firmware_opts=""
for product in "${firmware_products[@]}"; do
uc_product=${product^^}
branch=${firmware[$product.branch]}
version=${firmware[$product.version]}
link="$humanpath/firmware/$product-$branch"
if [[ ! -L "$link" ]]; then
echo "Expected symlink $link for required $product firmware, but this either does not exist or is not a symlink - install failed"
exit 1
fi
firmware_path=$(readlink -f $humanpath/firmware/$product-$branch)
if [[ ! -e "$firmware_path" ]]; then
echo "Required $product firmware install directory not found at $firmware_path. Unable to build xsabin files"
exit 1
fi
# Locate the required firmware binary file
case "$product" in
"ert")
# ERT firmware is deployed within XRT and has its own file naming rule
if [[ "$branch" == "mainline" ]] || [[ "$branch" == "legacy" ]] || [[ "$branch" == "" ]]; then
ert_name="sched.bin"
else
ert_name="sched_$branch.bin"
fi
firmware_file="$firmware_path/$ert_name"
if [[ ! -e "$firmware_file" ]]; then
echo "Cannot locate required $product firmware: not found at $firmware_file. Unable to build xsabin files"
exit 1
fi
firmware_opts+=" --add-section SCHED_FIRMWARE:RAW:${firmware_file}"
;;
*)
# All other firmware is deployed in its own package
# Accommodate possible variations in firmware file name, as long as the file name contains the product name
# During firmware upgrade, it is possible that both the old and the new firmware files are both present
# (the old one may not be removed until after this script has run).
# In this situation, the new firmware product, branch and version are provided as script arguments:
# select the appropriate file here (if multiple files are found).
firmware_files=()
for globfile in $firmware_path/*; do
if [[ -e "$globfile" ]] && [[ ! -d "$globfile" ]]; then
globfilename=${globfile##*/}
if [[ "$globfilename" == *"$product"* ]] || [[ "$globfilename" == *"$uc_product"* ]]; then
firmware_files+=($globfile)
fi
fi
done
if [[ "${#firmware_files[@]}" -eq 0 ]]; then
echo "Cannot locate required $product firmware: not found at $firmware_path. Unable to build xsabin files"
exit 1
fi
firmware_file=""
if [[ "${#firmware_files[@]}" -gt 1 ]]; then
IFS=$'\n'
firmware_files=( $(sort -V <<<"${firmware_files[*]}") )
unset IFS
if [[ "$firmware_upgrade_product" == "$product" ]]; then
firmware_file=""
for fw_file in "${firmware_files[@]}"; do
fw_filename=${fw_file##*/}
if [[ "$fw_filename" == *"$firmware_upgrade_version"* ]]; then
firmware_file=$fw_file
fi
done
fi
fi
if [[ -z "$firmware_file" ]]; then
firmware_file="${firmware_files[-1]}"
fi
# Select the correct xsabin section name, depending on the firmware product
section=""
case "$product" in
"cmc")
section="FIRMWARE"
;;
"sc-fw" | "sc")
section="BMC-FW"
;;
*)
echo "Unrecognised firmware product name '$product', unable to select the correct xsabin section name"
exit 1
;;
esac
firmware_opts+=" --add-section ${section}:RAW:${firmware_file}"
# The SC firmware (BMC-FW section) may have a metadata JSON file also to be added
if [[ "$section" == "BMC-FW" && -e "$firmware_path/metadata.json" ]]; then
firmware_opts+=" --add-section BMC-METADATA:JSON:${firmware_path}/metadata.json"
fi
;;
esac
done
# Extract vendor, board, name and version from partition_metadata.json, to build PlatformVBNV
declare -A vbnv
for element in {partition_vendor,partition_card,partition_family,partition_name,installed_package_version}; do
line="$(grep $element $json)"
if [[ -n "$line" ]]; then
value=${line%\"*}
value=${value##*\"}
if [[ "$element" != "partition_vendor" ]] && [[ "$element" != "partition_card" ]]; then
value=${value//-/_}
fi
else
value="UNKNOWN"
fi
vbnv[$element]=$value
done
platform_vbnv="${vbnv[partition_vendor]}:${vbnv[partition_card]}:${vbnv[partition_family]}_${vbnv[partition_name]}:${vbnv[installed_package_version]}"
# Check for VBNV override in partition_metadata.json
line="$(grep vbnv_override $json)"
if [[ -n "$line" ]]; then
value=${line%\"*}
value=${value##*\"}
# Check VBNV override value is correctly formatted (4 fields separated by colons)
fields="$(echo "$value" | tr ':' ' ' | wc -w)"
if [[ "$fields" == "4" ]]; then
platform_vbnv=$value
fi
fi
# Use the XRT standard install path to find xclbinutil
xclbinutil="${xclbinutil:-/opt/xilinx/xrt/bin/xclbinutil}"
if [[ ! -e "$xclbinutil" ]]; then
echo "xclbinutil tool not found at $xclbinutil, unable to build xsabin files"
exit 1
fi
## Must source XRT's setup.sh to set up environment correctly
#if [[ ! -e /opt/xilinx/xrt/setup.sh ]]; then
# echo "XRT setup.sh not found at /opt/xilinx/xrt/setup.sh, XRT installation is bad. Cannot build xsabin files"
# exit 1
#fi
#source /opt/xilinx/xrt/setup.sh
# Build xclbinutil options for creating xsabin files
xclbinopts=" --force"
if [[ -e "partition.mcs" ]]; then
xclbinopts+=" --add-section MCS-PRIMARY:RAW:partition.mcs"
fi
if [[ -e "partition_secondary.mcs" ]]; then
xclbinopts+=" --add-section MCS-SECONDARY:RAW:partition_secondary.mcs"
fi
if [[ -e "partition.bin" ]]; then
xclbinopts+=" --add-section FLASH[BIN]-DATA:RAW:partition.bin"
fi
if [[ -e "bin_metadata.json" ]]; then
xclbinopts+=" --add-section FLASH[BIN]-METADATA:JSON:bin_metadata.json"
fi
if [[ -e "partition.bit" ]]; then
xclbinopts+=" --add-section BITSTREAM:RAW:partition.bit"
fi
if [[ -e "partition.pdi" ]]; then
xclbinopts+=" --add-section PDI:RAW:partition.pdi"
fi
xclbinopts+=" --add-section PARTITION_METADATA:JSON:${json}"
xclbinopts+=$firmware_opts
xclbinopts+=" --key-value SYS:PlatformVBNV:${platform_vbnv}"
# Create partition.xsabin
xsabin="partition.xsabin"
xclbincmd="${xclbinutil} ${xclbinopts} --output $xsabin"
echo $xclbincmd
$xclbincmd
if [[ $? -ne 0 ]]; then
echo "An error occurred while running xclbinutil"
exit 1
fi
if [[ ! -e "$xsabin" ]]; then
echo "xclbinutil did not create output file $xsabin"
exit 1
fi
# And we're done
echo "create_xsabin.sh completed successfully"

75
m/raccoon/xilinx-fw.nix Normal file
View File

@@ -0,0 +1,75 @@
{
stdenv
, lib
, dpkg
, fetchurl
, xilinx-xrt
}:
with lib;
# Must read: https://xilinx.github.io/XRT/master/html/platforms_partitions.html#shell
# Taken from:
# - https://aur.archlinux.org/packages/xilinx-sc-fw-u280
# - https://aur.archlinux.org/packages/xilinx-u280-gen3x16-xdma-base
stdenv.mkDerivation rec {
pname = "xilinx-fw";
version = "1.3.5-3592445";
srcs = [
# List packages with: curl https://packages.xilinx.com/artifactory/debian-packages-cache/pool/
(fetchurl {
url = "https://packages.xilinx.com/artifactory/debian-packages-cache/pool/xilinx-cmc-u280_1.3.5-3592445_all.deb";
hash = "sha256-H48bdeuBc9dK6LExMnw1RCfY85PKntBk/X8CMcAI+zI=";
})
(fetchurl {
url = "https://packages.xilinx.com/artifactory/debian-packages-cache/pool/xilinx-sc-fw-u280_4.3.28-1.ea1b92f_all.deb";
hash = "sha256-JxQal2IqYAgebAgfjs2noFG5ghxC9sJQFppJFUCx6jA=";
})
(fetchurl {
url = "https://packages.xilinx.com/artifactory/debian-packages-cache/pool/xilinx-u280-gen3x16-xdma-base_1-3585717_all.deb";
hash = "sha256-oe84YgmmRFZjNa63j0pIneuFUG0Bb4aA7wulyU4bCrY=";
})
(fetchurl {
url = "https://packages.xilinx.com/artifactory/debian-packages-cache/pool/xilinx-u280-gen3x16-xdma-validate_1-3585755_all.deb";
hash = "sha256-F+IAzR8NVc9FDsgQstpBcKeq3ogH1PI8nuq94sEExCg=";
})
# Needed for the ERT firmware
(fetchurl {
url = "https://packages.xilinx.com/artifactory/debian-packages-cache/pool/xrt_202320.2.16.204_22.04-amd64-xrt.deb";
hash = "sha256-FEhzx2KlIYpunXmTSBjtyAtblbuz5tkvnt2qp21gUho=";
})
];
dontStrip = true;
hardeningDisable = [ "all" ];
nativeBuildInputs = [ dpkg ];
unpackPhase = ''
for f in $srcs; do
dpkg-deb -x "$f" deb
done
sourceRoot=deb
'';
# Generate the xsabin firmware file by fixing the original script
buildPhase = ''
set -x
ln -rs lib/firmware/xilinx/283bab8f654d8674968f4da57f7fa5d7 lib/firmware/xilinx/fb2b2c5a19ed63593fea95f51fbc8eb9
ln -rs lib/firmware/xilinx/283bab8f654d8674968f4da57f7fa5d7/partition_metadata.json opt/xilinx/firmware/u280/gen3x16-xdma/base/partition_metadata.json
ln -rs lib/firmware/xilinx/283bab8f654d8674968f4da57f7fa5d7/partition.xsabin opt/xilinx/firmware/u280/gen3x16-xdma/base/partition.xsabin
ln -rs opt/xilinx/xrt/share/fw opt/xilinx/firmware/u280/gen3x16-xdma/base/firmware/ert-v30
ln -rs opt/xilinx/firmware/cmc/u280 opt/xilinx/firmware/u280/gen3x16-xdma/base/firmware/cmc-u280
ln -rs opt/xilinx/firmware/sc-fw/u280 opt/xilinx/firmware/u280/gen3x16-xdma/base/firmware/sc-fw-u280
find
export xclbinutil=${xilinx-xrt}/xrt/bin/xclbinutil
cp -a ${./xilinx-create-xsabin.sh} opt/xilinx/firmware/u280/gen3x16-xdma/base/scripts/create_xsabin.sh
bash -x opt/xilinx/firmware/u280/gen3x16-xdma/base/scripts/create_xsabin.sh xilinx-u280-gen3x16-xdma-base 1 3585717
set +x
'';
installPhase = ''
mkdir -p $out
cp -a * $out
'';
}

View File

@@ -0,0 +1,27 @@
--- a/driver/xocl/mgmtpf/Makefile 2025-02-20 15:59:28.379826176 +0100
+++ b/driver/xocl/mgmtpf/Makefile 2025-02-20 15:59:42.366892140 +0100
@@ -119,10 +119,6 @@ all:
install: all
$(MAKE) -C $(KERNEL_SRC) M=$(PWD) modules_install
- depmod -a
- install -m 644 99-xclmgmt.rules /etc/udev/rules.d
- -rmmod -s xclmgmt || true
- -modprobe xclmgmt
clean:
rm -rf *.o *.o.d *.o.cmd *~ core .depend .*.cmd *.ko *.ko.unsigned \
--- a/driver/xocl/userpf/Makefile 2025-02-20 16:03:20.751922522 +0100
+++ b/driver/xocl/userpf/Makefile 2025-02-20 16:03:35.377991553 +0100
@@ -138,11 +138,6 @@ all:
install: all
$(MAKE) -C $(KERNEL_SRC) M=$(PWD) modules_install
- depmod -a
- install -m 644 99-xocl.rules /etc/udev/rules.d
- -rmmod -s xocl || true
- -rmmod -s xdma || true
- -modprobe xocl
clean:
rm -rf *.o *.o.d *~ core .depend .*.cmd *.ko *.ko.unsigned *.mod.c \

35
m/raccoon/xilinx-xocl.nix Normal file
View File

@@ -0,0 +1,35 @@
{
stdenv
, lib
, kernel
, xilinx-xrt
}:
with lib;
# See: https://iotlab.sdsu.edu/index.php/flash-base-image-on-xilinx-alveo-u280/
stdenv.mkDerivation rec {
pname = "xilinx-xocl";
version = "2.19.0";
src = "${xilinx-xrt}/src/xrt-${version}";
dontStrip = true;
preBuild = ''
cd driver/xocl
'';
patches = [
./xilinx-xocl-depmod.patch
];
buildFlags = [ "KERNEL_SRC=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build" ];
installFlags = [
"KERNEL_SRC=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
"INSTALL_MOD_PATH=${placeholder "out"}"
];
postInstall = ''
mkdir -p $out/etc/udev/rules.d
install -m 644 userpf/99-xocl.rules $out/etc/udev/rules.d
install -m 644 mgmtpf/99-xclmgmt.rules $out/etc/udev/rules.d
'';
nativeBuildInputs = kernel.moduleBuildDependencies;
hardeningDisable = [ "all" ];
}

View File

@@ -0,0 +1,25 @@
--- a/src/runtime_src/core/common/aiebu/src/cpp/aiebu/utils/asm/CMakeLists.txt
+++ b/src/runtime_src/core/common/aiebu/src/cpp/aiebu/utils/asm/CMakeLists.txt
@@ -23,8 +23,6 @@ add_executable(aiebu-asm $<TARGET_OBJECTS:aiebu_asm_objects>)
target_link_libraries(aiebu-asm PRIVATE aiebu_static)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
- target_link_options(aiebu-asm PRIVATE "-static")
- set_target_properties(aiebu-asm PROPERTIES INSTALL_RPATH "" BUILD_RPATH "")
# Create a dynamically linked executable. aiebu-asm-dyn, on Linux for running
# valgrind, etc. This binary is not released for deployment but only used for
@@ -35,13 +33,6 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
target_link_libraries(aiebu-asm-dyn PRIVATE aiebu_static)
endif()
-# This custom target fails if aiebu-asm has any dynamic dependencies
-add_custom_target(check_dynamic_deps ALL
- COMMAND ${CMAKE_COMMAND} -E echo "Checking for dynamic dependencies ..."
- COMMAND ${CMAKE_COMMAND} -P "${AIEBU_SOURCE_DIR}/cmake/depends.cmake" $<TARGET_FILE:aiebu-asm> aiebu-asm_depends.txt
- DEPENDS aiebu-asm
- )
-
install(TARGETS aiebu-asm
RUNTIME DESTINATION ${AIEBU_INSTALL_BIN_DIR}
CONFIGURATIONS Debug Release COMPONENT Runtime

View File

@@ -0,0 +1,13 @@
diff --git a/src/CMake/icd.cmake b/src/CMake/icd.cmake
index 255a2e3d8..460a6d4c7 100644
--- a/src/CMake/icd.cmake
+++ b/src/CMake/icd.cmake
@@ -10,7 +10,7 @@ configure_file (
${ICD_FILE_NAME}
)
-set(OCL_ICD_INSTALL_PREFIX "/etc/OpenCL/vendors")
+set(OCL_ICD_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}/etc/OpenCL/vendors")
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${ICD_FILE_NAME}
DESTINATION ${OCL_ICD_INSTALL_PREFIX}

View File

@@ -0,0 +1,204 @@
From 6f64871f2e679ad5d3b140c8a2732edaae2dcf6a Mon Sep 17 00:00:00 2001
From: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
Date: Thu, 20 Feb 2025 18:49:54 +0100
Subject: [PATCH] Revert "Removed support for u50lv, u55n and u280 platforms in
XRT (#7901)"
This reverts commit 41f4221433c6b173316b61cb2e7e3ee5152d8075.
---
.../core/pcie/driver/linux/xocl/devices.h | 103 ++++++++++++++++++
1 file changed, 103 insertions(+)
diff --git a/src/runtime_src/core/pcie/driver/linux/xocl/devices.h b/src/runtime_src/core/pcie/driver/linux/xocl/devices.h
index 971ad73d2..5fe329cfa 100644
--- a/src/runtime_src/core/pcie/driver/linux/xocl/devices.h
+++ b/src/runtime_src/core/pcie/driver/linux/xocl/devices.h
@@ -2064,6 +2064,14 @@ struct xocl_subdev_map {
.subdev_num = ARRAY_SIZE(USER_RES_DSA52), \
}
+#define XOCL_BOARD_USER_DSA52_U280 \
+ (struct xocl_board_private){ \
+ .flags = 0, \
+ .subdev_info = USER_RES_DSA52, \
+ .subdev_num = ARRAY_SIZE(USER_RES_DSA52), \
+ .p2p_bar_sz = 64, \
+ }
+
#define XOCL_BOARD_USER_SMARTN \
(struct xocl_board_private){ \
.flags = XOCL_DSAFLAG_SMARTN, \
@@ -2370,6 +2378,30 @@ struct xocl_subdev_map {
.flash_type = FLASH_TYPE_SPI, \
}
+
+#define MGMT_RES_XBB_DSA52_U280 \
+ ((struct xocl_subdev_info []) { \
+ XOCL_DEVINFO_FEATURE_ROM, \
+ XOCL_DEVINFO_PRP_IORES_MGMT, \
+ XOCL_DEVINFO_AXIGATE_ULP, \
+ XOCL_DEVINFO_CLOCK_HBM, \
+ XOCL_DEVINFO_AF_DSA52, \
+ XOCL_DEVINFO_XMC, \
+ XOCL_DEVINFO_XVC_PRI, \
+ XOCL_DEVINFO_MAILBOX_MGMT, \
+ XOCL_DEVINFO_ICAP_MGMT, \
+ XOCL_DEVINFO_FMGR, \
+ XOCL_DEVINFO_FLASH, \
+ })
+
+#define XOCL_BOARD_MGMT_XBB_DSA52_U280 \
+ (struct xocl_board_private){ \
+ .flags = 0, \
+ .subdev_info = MGMT_RES_XBB_DSA52_U280, \
+ .subdev_num = ARRAY_SIZE(MGMT_RES_XBB_DSA52_U280), \
+ .flash_type = FLASH_TYPE_SPI, \
+ }
+
#define MGMT_RES_XBB_SMARTN \
((struct xocl_subdev_info []) { \
XOCL_DEVINFO_FEATURE_ROM_SMARTN, \
@@ -2772,6 +2804,24 @@ struct xocl_subdev_map {
.board_name = "u50" \
}
+#define XOCL_BOARD_U55N_USER_RAPTOR2 \
+ (struct xocl_board_private){ \
+ .flags = XOCL_DSAFLAG_DYNAMIC_IP, \
+ .board_name = "u55n", \
+ .subdev_info = RES_USER_VSEC, \
+ .subdev_num = ARRAY_SIZE(RES_USER_VSEC), \
+ }
+
+#define XOCL_BOARD_U55N_MGMT_RAPTOR2 \
+ (struct xocl_board_private){ \
+ .flags = XOCL_DSAFLAG_DYNAMIC_IP, \
+ .subdev_info = RES_MGMT_VSEC, \
+ .subdev_num = ARRAY_SIZE(RES_MGMT_VSEC), \
+ .flash_type = FLASH_TYPE_SPI, \
+ .board_name = "u55n", \
+ .vbnv = "xilinx_u55n" \
+ }
+
#define XOCL_BOARD_U55C_USER_RAPTOR2 \
(struct xocl_board_private){ \
.flags = XOCL_DSAFLAG_DYNAMIC_IP, \
@@ -2790,6 +2840,24 @@ struct xocl_subdev_map {
.vbnv = "xilinx_u55c" \
}
+#define XOCL_BOARD_U50LV_USER_RAPTOR2 \
+ (struct xocl_board_private){ \
+ .flags = XOCL_DSAFLAG_DYNAMIC_IP, \
+ .board_name = "u50lv", \
+ .subdev_info = RES_USER_VSEC, \
+ .subdev_num = ARRAY_SIZE(RES_USER_VSEC), \
+ }
+
+#define XOCL_BOARD_U50LV_MGMT_RAPTOR2 \
+ (struct xocl_board_private){ \
+ .flags = XOCL_DSAFLAG_DYNAMIC_IP, \
+ .subdev_info = RES_MGMT_VSEC, \
+ .subdev_num = ARRAY_SIZE(RES_MGMT_VSEC), \
+ .flash_type = FLASH_TYPE_SPI, \
+ .board_name = "u50lv", \
+ .vbnv = "xilinx_u50lv" \
+ }
+
#define XOCL_BOARD_U50C_USER_RAPTOR2 \
(struct xocl_board_private){ \
.flags = XOCL_DSAFLAG_DYNAMIC_IP, \
@@ -2834,6 +2902,14 @@ struct xocl_subdev_map {
.p2p_bar_sz = 64, \
}
+#define XOCL_BOARD_U280_USER_RAPTOR2 \
+ (struct xocl_board_private){ \
+ .flags = XOCL_DSAFLAG_DYNAMIC_IP, \
+ .subdev_info = RES_USER_VSEC, \
+ .subdev_num = ARRAY_SIZE(RES_USER_VSEC), \
+ .board_name = "u280", \
+ }
+
#define XOCL_BOARD_U250_MGMT_RAPTOR2 \
(struct xocl_board_private){ \
.flags = XOCL_DSAFLAG_DYNAMIC_IP, \
@@ -2843,6 +2919,15 @@ struct xocl_subdev_map {
.board_name = "u250" \
}
+#define XOCL_BOARD_U280_MGMT_RAPTOR2 \
+ (struct xocl_board_private){ \
+ .flags = XOCL_DSAFLAG_DYNAMIC_IP, \
+ .subdev_info = RES_MGMT_VSEC, \
+ .subdev_num = ARRAY_SIZE(RES_MGMT_VSEC), \
+ .flash_type = FLASH_TYPE_SPI, \
+ .board_name = "u280" \
+ }
+
#define XOCL_BOARD_VERSAL_USER_RAPTOR2 \
(struct xocl_board_private){ \
.flags = XOCL_DSAFLAG_DYNAMIC_IP | \
@@ -3435,6 +3520,8 @@ struct xocl_subdev_map {
{ XOCL_PCI_DEVID(0x10EE, 0x6A8F, 0x4353, MGMT_6A8F_DSA52) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5000, PCI_ANY_ID, MGMT_XBB_DSA52_U200) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5004, PCI_ANY_ID, MGMT_XBB_DSA52) }, \
+ { XOCL_PCI_DEVID(0x10EE, 0x5008, PCI_ANY_ID, MGMT_XBB_DSA52_U280) },\
+ { XOCL_PCI_DEVID(0x10EE, 0x500C, PCI_ANY_ID, MGMT_XBB_DSA52_U280) },\
{ XOCL_PCI_DEVID(0x10EE, 0x5020, PCI_ANY_ID, MGMT_U50) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5028, PCI_ANY_ID, MGMT_VERSAL) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5044, PCI_ANY_ID, MGMT_VERSAL) }, \
@@ -3448,7 +3535,9 @@ struct xocl_subdev_map {
{ XOCL_PCI_DEVID(0x10EE, 0x5078, PCI_ANY_ID, VERSAL_MGMT_RAPTOR2) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5050, PCI_ANY_ID, MGMT_U25) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x504E, PCI_ANY_ID, U26Z_MGMT_RAPTOR2) }, \
+ { XOCL_PCI_DEVID(0x10EE, 0x5058, PCI_ANY_ID, U55N_MGMT_RAPTOR2) },\
{ XOCL_PCI_DEVID(0x10EE, 0x505C, PCI_ANY_ID, U55C_MGMT_RAPTOR2) },\
+ { XOCL_PCI_DEVID(0x10EE, 0x5060, PCI_ANY_ID, U50LV_MGMT_RAPTOR2) },\
{ XOCL_PCI_DEVID(0x10EE, 0x506C, PCI_ANY_ID, U50C_MGMT_RAPTOR2) },\
{ XOCL_PCI_DEVID(0x10EE, 0x5074, PCI_ANY_ID, X3522PV_MGMT_RAPTOR2) }, \
{ XOCL_PCI_DEVID(0x13FE, 0x006C, PCI_ANY_ID, MGMT_6A8F) }, \
@@ -3457,6 +3546,8 @@ struct xocl_subdev_map {
{ XOCL_PCI_DEVID(0x10EE, 0xF987, PCI_ANY_ID, XBB_MFG("samsung_efuse")) },\
{ XOCL_PCI_DEVID(0x10EE, 0xD000, PCI_ANY_ID, XBB_MFG("u200")) },\
{ XOCL_PCI_DEVID(0x10EE, 0xD004, PCI_ANY_ID, XBB_MFG("u250")) },\
+ { XOCL_PCI_DEVID(0x10EE, 0xD008, PCI_ANY_ID, XBB_MFG("u280-es1")) }, \
+ { XOCL_PCI_DEVID(0x10EE, 0xD00C, PCI_ANY_ID, XBB_MFG("u280")) },\
{ XOCL_PCI_DEVID(0x10EE, 0xD030, PCI_ANY_ID, XBB_MFG("poc1465")) },\
{ XOCL_PCI_DEVID(0x10EE, 0xD020, PCI_ANY_ID, XBB_MFG_U50) }, \
{ XOCL_PCI_DEVID(0x10EE, 0xD03C, PCI_ANY_ID, XBB_MFG_U30) }, \
@@ -3495,11 +3586,15 @@ struct xocl_subdev_map {
{ XOCL_PCI_DEVID(0x10EE, 0x7990, 0x4352, USER_DSA52) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5001, PCI_ANY_ID, USER_DSA52) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5005, PCI_ANY_ID, USER_DSA52) }, \
+ { XOCL_PCI_DEVID(0x10EE, 0x5009, PCI_ANY_ID, USER_DSA52_U280) }, \
+ { XOCL_PCI_DEVID(0x10EE, 0x500D, PCI_ANY_ID, USER_DSA52_U280) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5021, PCI_ANY_ID, USER_U50) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x5051, PCI_ANY_ID, USER_U25) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x504F, PCI_ANY_ID, U26Z_USER_RAPTOR2) }, \
{ XOCL_PCI_DEVID(0x10EE, 0x513D, PCI_ANY_ID, U30_USER_RAPTOR2) }, \
+ { XOCL_PCI_DEVID(0x10EE, 0x5059, PCI_ANY_ID, U55N_USER_RAPTOR2) },\
{ XOCL_PCI_DEVID(0x10EE, 0x505D, PCI_ANY_ID, U55C_USER_RAPTOR2) },\
+ { XOCL_PCI_DEVID(0x10EE, 0x5061, PCI_ANY_ID, U50LV_USER_RAPTOR2) },\
{ XOCL_PCI_DEVID(0x10EE, 0x506D, PCI_ANY_ID, U50C_USER_RAPTOR2) },\
{ XOCL_PCI_DEVID(0x10EE, 0x5075, PCI_ANY_ID, X3522PV_USER_RAPTOR2) }, \
{ XOCL_PCI_DEVID(0x13FE, 0x0065, PCI_ANY_ID, USER_XDMA) }, \
@@ -3561,6 +3656,14 @@ struct xocl_subdev_map {
.vbnv = "xilinx_u250", \
.priv_data = &XOCL_BOARD_U250_MGMT_RAPTOR2, \
.type = XOCL_DSAMAP_RAPTOR2 }, \
+ { 0x10EE, 0x500D, PCI_ANY_ID, \
+ .vbnv = "xilinx_u280", \
+ .priv_data = &XOCL_BOARD_U280_USER_RAPTOR2, \
+ .type = XOCL_DSAMAP_RAPTOR2 }, \
+ { 0x10EE, 0x500C, PCI_ANY_ID, \
+ .vbnv = "xilinx_u280", \
+ .priv_data = &XOCL_BOARD_U280_MGMT_RAPTOR2, \
+ .type = XOCL_DSAMAP_RAPTOR2 }, \
{ 0x10EE, 0x5020, PCI_ANY_ID, \
.vbnv = "xilinx_u50", \
.priv_data = &XOCL_BOARD_U50_MGMT_RAPTOR2, \
--
2.45.2

74
m/raccoon/xilinx-xrt.nix Normal file
View File

@@ -0,0 +1,74 @@
{
stdenv
, fetchFromGitHub
, enableDebug ? false
, lib
, cmake
, pkg-config
, libdrm
, libelf
, opencl-headers
, ocl-icd
, git
, boost
, ncurses
, openssl
, rapidjson
, protobuf
, python3
, libuuid
, curl
, libsystemtap
, libxcrypt
, udev
}:
with lib;
stdenv.mkDerivation rec {
name = "xilinx-xrt";
version = "dc81a9cc";
src = fetchFromGitHub {
owner = "Xilinx";
repo = "XRT";
rev = "dc81a9cc852bf44e71aa3edde7c8f7d54f355eab";
hash = "sha256-SG1gIO8Bvgs5XQ7HswjWNavPH+m8xHXqauztuJa6aEo=";
fetchSubmodules = true;
};
dontStrip = true;
patches = [
./xilinx-xrt-aiebu.patch
./xilinx-xrt-icd.patch
./xilinx-xrt-u280-support.patch
];
cmakeFlags = [
"-DXRT_INSTALL_PREFIX=${placeholder "out"}"
"-DXRT_INSTALL_DIR=${placeholder "out"}"
"-DXRT_NATIVE_BUILD=yes"
"-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
# Enable debug
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
#"-DCMAKE_BUILD_TYPE=Debug"
#"-DXOCL_VERBOSE=1"
#"-DXRT_VERBOSE=1"
];
# A directory named "build" already exists
cmakeBuildDir = "the-build";
# Replace all occurences of /usr to $out, although some are not correct. By
# default they are replaced by /var/empty
dontFixCmake = true;
preConfigure = ''
find "." -type f \( -name "*.cmake" -o -name "*.cmake.in" -o -name CMakeLists.txt \) -print |
while read fn; do
sed -e 's^/usr\([ /]\|$\)^'$out'\1^g' -e 's^/opt\([ /]\|$\)^'$out'\1^g' < "$fn" > "$fn.tmp"
mv "$fn.tmp" "$fn"
done
'';
nativeBuildInputs = [ cmake pkg-config git ];
buildInputs = [ libdrm.dev opencl-headers ocl-icd boost.dev ncurses
openssl.dev rapidjson protobuf python3 libelf libuuid.dev curl.dev
libsystemtap libxcrypt udev.out udev.dev
];
hardeningDisable = [ "all" ];
}

View File

@@ -1,14 +0,0 @@
modules:
http_2xx:
prober: http
timeout: 5s
http:
preferred_ip_protocol: "ip4"
follow_redirects: true
valid_status_codes: [] # Defaults to 2xx
method: GET
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"

View File

@@ -1,85 +0,0 @@
{ config, pkgs, lib, ... }:
{
imports = [
../common/xeon.nix
../common/ssf/hosts.nix
../module/emulation.nix
../module/debuginfod.nix
./monitoring.nix
./nginx.nix
./nix-serve.nix
./gitlab-runner.nix
./gitea.nix
../hut/public-inbox.nix
../hut/msmtp.nix
../module/p.nix
../module/vpn-dac.nix
../module/hut-substituter.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675";
networking = {
hostName = "tent";
interfaces.eno1.ipv4.addresses = [
{
address = "10.0.44.4";
prefixLength = 24;
}
];
# Only BSC DNSs seem to be reachable from the office VLAN
nameservers = [ "84.88.52.35" "84.88.52.36" ];
search = [ "bsc.es" "ac.upc.edu" ];
defaultGateway = "10.0.44.1";
hosts = {
"84.88.53.236" = [ "apex" ];
"10.0.44.1" = [ "raccoon" ];
};
};
services.p.enable = true;
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
boot.swraid = {
enable = true;
mdadmConf = ''
DEVICE partitions
ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d
MAILADDR root
'';
};
fileSystems."/vault" = {
device = "/dev/disk/by-label/vault";
fsType = "ext4";
};
# Make a /vault/$USER directory for each user.
systemd.services.create-vault-dirs = let
# Take only normal users in tent
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0711 /vault/home/${user.name}"
]) users);
script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "local-fs.target" ];
after = [ "local-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
# disable automatic garbage collector
nix.gc.automatic = lib.mkForce false;
}

View File

@@ -1,30 +0,0 @@
{ config, lib, ... }:
{
services.gitea = {
enable = true;
appName = "Gitea in the jungle";
settings = {
server = {
ROOT_URL = "https://jungle.bsc.es/git/";
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
LANDING_PAGE = "explore";
};
metrics.ENABLED = true;
service = {
DISABLE_REGISTRATION = true;
REGISTER_MANUAL_CONFIRM = true;
ENABLE_NOTIFY_MAIL = true;
};
log.LEVEL = "Warn";
mailer = {
ENABLED = true;
FROM = "jungle-robot@bsc.es";
PROTOCOL = "sendmail";
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
SENDMAIL_ARGS = "--";
};
};
};
}

View File

@@ -1,93 +0,0 @@
{ pkgs, lib, config, ... }:
{
age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age;
age.secrets.tent-gitlab-runner-pm-docker.file = ../../secrets/tent-gitlab-runner-pm-docker-token.age;
age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age;
services.gitlab-runner = let sec = config.age.secrets; in {
enable = true;
settings.concurrent = 5;
services = {
# For gitlab.pm.bsc.es
gitlab-pm-shell = {
executor = "shell";
environmentVariables = {
SHELL = "${pkgs.bash}/bin/bash";
};
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-shell.path;
preGetSourcesScript = pkgs.writeScript "setup" ''
echo "This is the preGetSources script running, brace for impact"
env
'';
};
gitlab-pm-docker = {
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-docker.path;
executor = "docker";
dockerImage = "debian:stable";
};
# For gitlab.bsc.es
gitlab-bsc-docker = {
# gitlab.bsc.es still uses the old token mechanism
registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path;
tagList = [ "docker" "tent" "nix" ];
executor = "docker";
dockerImage = "alpine";
dockerVolumes = [
"/nix/store:/nix/store:ro"
"/nix/var/nix/db:/nix/var/nix/db:ro"
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
];
dockerDisableCache = true;
registrationFlags = [
# Increase build log length to 64 MiB
"--output-limit 65536"
];
preBuildScript = pkgs.writeScript "setup-container" ''
mkdir -p -m 0755 /nix/var/log/nix/drvs
mkdir -p -m 0755 /nix/var/nix/gcroots
mkdir -p -m 0755 /nix/var/nix/profiles
mkdir -p -m 0755 /nix/var/nix/temproots
mkdir -p -m 0755 /nix/var/nix/userpool
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
mkdir -p -m 0700 "$HOME/.nix-defexpr"
mkdir -p -m 0700 "$HOME/.ssh"
cat >> "$HOME/.ssh/known_hosts" << EOF
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
EOF
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
# Required to load SSL certificate paths
. ${pkgs.cacert}/nix-support/setup-hook
'';
environmentVariables = {
ENV = "/etc/profile";
USER = "root";
NIX_REMOTE = "daemon";
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
};
};
};
};
systemd.services.gitlab-runner.serviceConfig = {
DynamicUser = lib.mkForce false;
User = "gitlab-runner";
Group = "gitlab-runner";
ExecStart = lib.mkForce
''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}'';
};
users.users.gitlab-runner = {
uid = config.ids.uids.gitlab-runner;
home = "/var/lib/gitlab-runner";
description = "Gitlab Runner";
group = "gitlab-runner";
extraGroups = [ "docker" ];
createHome = true;
};
users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner;
}

View File

@@ -1,217 +0,0 @@
{ config, lib, pkgs, ... }:
{
imports = [
../module/meteocat-exporter.nix
../module/upc-qaire-exporter.nix
../module/nix-daemon-exporter.nix
];
age.secrets.grafanaJungleRobotPassword = {
file = ../../secrets/jungle-robot-password.age;
owner = "grafana";
mode = "400";
};
services.grafana = {
enable = true;
settings = {
server = {
domain = "jungle.bsc.es";
root_url = "%(protocol)s://%(domain)s/grafana";
serve_from_sub_path = true;
http_port = 2342;
http_addr = "127.0.0.1";
};
smtp = {
enabled = true;
from_address = "jungle-robot@bsc.es";
user = "jungle-robot";
# Read the password from a file, which is only readable by grafana user
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
host = "mail.bsc.es:465";
startTLS_policy = "NoStartTLS";
};
feature_toggles.publicDashboards = true;
"auth.anonymous".enabled = true;
log.level = "warn";
};
};
services.prometheus = {
enable = true;
port = 9001;
retentionTime = "5y";
listenAddress = "127.0.0.1";
};
# We need access to the devices to monitor the disk space
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
# Credentials for IPMI exporter
age.secrets.ipmiYml = {
file = ../../secrets/ipmi.yml.age;
owner = "ipmi-exporter";
};
# Create an IPMI group and assign the ipmi0 device
users.groups.ipmi = {};
services.udev.extraRules = ''
SUBSYSTEM=="ipmi", KERNEL=="ipmi0", GROUP="ipmi", MODE="0660"
'';
# Add a new ipmi-exporter user that can read the ipmi0 device
users.users.ipmi-exporter = {
isSystemUser = true;
group = "ipmi";
};
# Disable dynamic user so we have the ipmi-exporter user available for the credentials
systemd.services.prometheus-ipmi-exporter.serviceConfig = {
DynamicUser = lib.mkForce false;
PrivateDevices = lib.mkForce false;
User = lib.mkForce "ipmi-exporter";
Group = lib.mkForce "ipmi";
RestrictNamespaces = lib.mkForce false;
# Fake uid to 0 so it shuts up
ExecStart = let
cfg = config.services.prometheus.exporters.ipmi;
in lib.mkForce (lib.concatStringsSep " " ([
"${pkgs.util-linux}/bin/unshare --map-user 0"
"${pkgs.prometheus-ipmi-exporter}/bin/ipmi_exporter"
"--web.listen-address ${cfg.listenAddress}:${toString cfg.port}"
"--config.file ${lib.escapeShellArg cfg.configFile}"
] ++ cfg.extraFlags));
};
services.prometheus = {
exporters = {
ipmi = {
enable = true;
configFile = config.age.secrets.ipmiYml.path;
#extraFlags = [ "--log.level=debug" ];
listenAddress = "127.0.0.1";
};
node = {
enable = true;
enabledCollectors = [ "logind" ];
port = 9002;
listenAddress = "127.0.0.1";
};
blackbox = {
enable = true;
listenAddress = "127.0.0.1";
configFile = ./blackbox.yml;
};
};
scrapeConfigs = [
{
job_name = "local";
static_configs = [{
targets = [
"127.0.0.1:9002" # Node exporter
#"127.0.0.1:9115" # Blackbox exporter
"127.0.0.1:9290" # IPMI exporter for local node
"127.0.0.1:9928" # UPC Qaire custom exporter
"127.0.0.1:9929" # Meteocat custom exporter
"127.0.0.1:9999" # Nix-daemon custom exporter
];
}];
}
{
job_name = "blackbox-http";
metrics_path = "/probe";
params = { module = [ "http_2xx" ]; };
static_configs = [{
targets = [
"https://www.google.com/robots.txt"
"https://pm.bsc.es/"
"https://pm.bsc.es/gitlab/"
"https://jungle.bsc.es/"
"https://gitlab.bsc.es/"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:9115";
}
];
}
{
job_name = "blackbox-icmp";
metrics_path = "/probe";
params = { module = [ "icmp" ]; };
static_configs = [{
targets = [
"1.1.1.1"
"8.8.8.8"
"ssfhead"
"raccoon"
"anella-bsc.cesca.cat"
"upc-anella.cesca.cat"
"fox.ac.upc.edu"
"fox-ipmi.ac.upc.edu"
"arenys5.ac.upc.edu"
"arenys0-2.ac.upc.edu"
"epi01.bsc.es"
"axle.bsc.es"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:9115";
}
];
}
{
job_name = "ipmi-raccoon";
metrics_path = "/ipmi";
static_configs = [
{ targets = [ "127.0.0.1:9290" ]; }
];
params = {
target = [ "raccoon-ipmi" ];
module = [ "raccoon" ];
};
}
{
job_name = "ipmi-fox";
metrics_path = "/ipmi";
static_configs = [
{ targets = [ "127.0.0.1:9290" ]; }
];
params = {
target = [ "fox-ipmi.ac.upc.edu" ];
module = [ "fox" ];
};
}
];
};
}

View File

@@ -1,79 +0,0 @@
{ theFlake, pkgs, ... }:
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = pkgs.fetchgit {
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
};
buildInputs = [ pkgs.hugo ];
buildPhase = ''
rm -rf public/
hugo
'';
installPhase = ''
cp -r public $out
'';
# Don't mess doc/
dontFixup = true;
};
in
{
networking.firewall.allowedTCPPorts = [ 80 ];
services.nginx = {
enable = true;
virtualHosts."jungle.bsc.es" = {
root = "${website}";
listen = [
{
addr = "0.0.0.0";
port = 80;
}
];
extraConfig = ''
set_real_ip_from 127.0.0.1;
set_real_ip_from 84.88.52.107;
real_ip_recursive on;
real_ip_header X-Forwarded-For;
location /git {
rewrite ^/git$ / break;
rewrite ^/git/(.*) /$1 break;
proxy_pass http://127.0.0.1:3000;
proxy_redirect http:// $scheme://;
}
location /cache {
rewrite ^/cache/(.*) /$1 break;
proxy_pass http://127.0.0.1:5000;
proxy_redirect http:// $scheme://;
}
location /lists {
proxy_pass http://127.0.0.1:8081;
proxy_redirect http:// $scheme://;
}
location /grafana {
proxy_pass http://127.0.0.1:2342;
proxy_redirect http:// $scheme://;
proxy_set_header Host $host;
# Websockets
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ ^/~(.+?)(/.*)?$ {
alias /vault/home/$1/public_html$2;
index index.html index.htm;
autoindex on;
absolute_redirect off;
}
location /p/ {
alias /var/lib/p/;
}
location /pub/ {
alias /vault/pub/;
}
'';
};
};
}

View File

@@ -1,16 +0,0 @@
{ config, ... }:
{
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
services.nix-serve = {
enable = true;
# Only listen locally, as we serve it via ssh
bindAddress = "127.0.0.1";
port = 5000;
secretKeyFile = config.age.secrets.nixServe.path;
# Public key:
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
};
}

View File

@@ -1,33 +0,0 @@
{ lib, ... }:
{
imports = [
../common/ssf.nix
../module/hut-substituter.nix
];
# Select this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca";
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
# Users with sudo access
users.groups.wheel.members = [ "abonerib" "anavarro" ];
# Run julia installed with juliaup using julia's own libraries:
# NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia
programs.nix-ld.enable = true;
networking = {
hostName = "weasel";
interfaces.eno1.ipv4.addresses = [ {
address = "10.0.40.6";
prefixLength = 24;
} ];
interfaces.ibp5s0.ipv4.addresses = [ {
address = "10.0.42.6";
prefixLength = 24;
} ];
};
}

View File

@@ -1,89 +0,0 @@
{ stdenv
, lib
, curl
, cacert
, runCommandLocal
, autoPatchelfHook
, elfutils
, glib
, libGL
, ncurses5
, xorg
, zlib
, libxkbcommon
, freetype
, fontconfig
, libGLU
, dbus
, rocmPackages
, libxcrypt-legacy
, numactl
, radare2
}:
let
version = "5.1.701";
tarball = "AMDuProf_Linux_x64_${version}.tar.bz2";
# NOTE: Remember to update the radare2 patch below if AMDuProfPcm changes.
uprofSrc = runCommandLocal tarball {
nativeBuildInputs = [ curl ];
outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk=";
SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt";
} ''
curl \
-o $out \
'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \
-H 'Accept-Language: en-US,en;q=0.5' \
-H 'Accept-Encoding: gzip, deflate, br, zstd' \
-H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n'
'';
in
stdenv.mkDerivation {
pname = "AMD-uProf";
inherit version;
src = uprofSrc;
dontStrip = true;
phases = [ "installPhase" "fixupPhase" ];
nativeBuildInputs = [ autoPatchelfHook radare2 ];
buildInputs = [
stdenv.cc.cc.lib
ncurses5
elfutils
glib
libGL
libGLU
libxcrypt-legacy
xorg.libX11
xorg.libXext
xorg.libXi
xorg.libXmu
xorg.libxcb
xorg.xcbutilwm
xorg.xcbutilrenderutil
xorg.xcbutilkeysyms
xorg.xcbutilimage
fontconfig.lib
libxkbcommon
zlib
freetype
dbus
rocmPackages.rocprofiler
numactl
];
installPhase = ''
set -x
mkdir -p $out
tar -x -v -C $out --strip-components=1 -f $src
rm $out/bin/AMDPowerProfilerDriverSource.tar.gz
patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so
patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys
echo "16334a51fcc48668307ad94e20482ca4 $out/bin/AMDuProfPcm" | md5sum -c -
radare2 -w -q -i ${./libnuma.r2} $out/bin/AMDuProfPcm
patchelf --add-needed libnuma.so $out/bin/AMDuProfPcm
set +x
'';
}

View File

@@ -1,33 +0,0 @@
{ stdenv
, lib
, amd-uprof
, kernel
, runCommandLocal
}:
let
version = amd-uprof.version;
tarball = amd-uprof.src;
in stdenv.mkDerivation {
pname = "AMDPowerProfilerDriver";
inherit version;
src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } ''
set -x
tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz
mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out
set +x
'';
hardeningDisable = [ "pic" "format" ];
nativeBuildInputs = kernel.moduleBuildDependencies;
patches = [ ./makefile.patch ./hrtimer.patch ];
makeFlags = [
"KERNEL_VERSION=${kernel.modDirVersion}"
"KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
"INSTALL_MOD_PATH=$(out)"
];
meta = {
description = "AMD Power Profiler Driver";
homepage = "https://www.amd.com/es/developer/uprof.html";
platforms = lib.platforms.linux;
};
}

View File

@@ -1,31 +0,0 @@
--- a/src/PmcTimerConfig.c 2025-09-04 12:17:16.771707049 +0200
+++ b/src/PmcTimerConfig.c 2025-09-04 12:17:04.878515468 +0200
@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo)
DRVPRINT("pTimerConfig(%p)", pTimerConfig);
- hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+ hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
}
int PmcSetupTimer(ClientContext* pClientCtx)
@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient
{
/* Interval in ms */
pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000);
- pTimerConfig->m_hrTimer.function = PmcTimerCallback;
DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time);
}
--- a/src/PwrProfTimer.c 2025-09-04 12:18:08.750544327 +0200
+++ b/src/PwrProfTimer.c 2025-09-04 12:18:28.557863382 +0200
@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu)
pCoreClientData = &per_cpu(g_coreClientData, cpu);
// initialize HR timer
- hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- pCoreClientData->m_hrTimer.function = &HrTimerCallback;
+ hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
return;
} // InitHrTimer

View File

@@ -1,10 +0,0 @@
# Patch arguments to call sym std::string::find(char const*, unsigned long, unsigned long)
# so it matches NixOS:
#
# Change OS name to NixOS
wz NixOS @ 0x00550a43
# And set the length to 5 characters
wa mov ecx, 5 @0x00517930
#
# Then change the argument to dlopen() so it only uses libnuma.so
wz libnuma.so @ 0x00562940

View File

@@ -1,66 +0,0 @@
--- a/Makefile 2025-06-19 20:36:49.346693267 +0200
+++ b/Makefile 2025-06-19 20:42:29.778088660 +0200
@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi
MODULE_NAME_KO=$(MODULE_NAME).ko
# check is module inserted
-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
+#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
# check pcore dkms status
PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION))
@@ -50,7 +50,7 @@ endif
# “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00
G_VERSION=9
K_VERSION=5
-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.)
+KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.)
GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.)
ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION))))
ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION))))
@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs := src/PmcDataBuffe
# make
all:
- @chmod a+x ./AMDPPcert.sh
- @./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status;
- @SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
- if [ $$SIGSTATUS1 -eq 1 ]; then \
- exit 1; \
- fi
- @make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules
- @SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \
- if [ $$SIGSTATUS3 -eq 0 ]; then \
- ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \
- fi
+ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules
# make clean
clean:
@@ -84,23 +74,9 @@ clean:
# make install
install:
- @mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra
- @rm -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO)
- @cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/
- @depmod -a
- @if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \
- echo "Uninstalling AMDPowerProfiler Linux kernel module.";\
- rmmod $(MODULE_NAME);\
- fi
- @modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \
- cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \
- echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
- if [ $$SIGSTATUS1 -eq 0 ]; then \
- echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \
- exit 1; \
- else \
- cat $(PWD)/sign_status1; \
- fi
+ mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
+ cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
+
# make dkms
dkms:
@chmod a+x ./AMDPPcert.sh

View File

@@ -1,12 +0,0 @@
HOSTCXX ?= g++
NVCC := nvcc -ccbin $(HOSTCXX)
CXXFLAGS := -m64
# Target rules
all: cudainfo
cudainfo: cudainfo.cpp
$(NVCC) $(CXXFLAGS) -o $@ $<
clean:
rm -f cudainfo cudainfo.o

View File

@@ -1,600 +0,0 @@
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
// Shared Utilities (QA Testing)
// std::system includes
#include <memory>
#include <iostream>
#include <cuda_runtime.h>
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
// CUDA Runtime error messages
#ifdef __DRIVER_TYPES_H__
static const char *_cudaGetErrorEnum(cudaError_t error)
{
switch (error)
{
case cudaSuccess:
return "cudaSuccess";
case cudaErrorMissingConfiguration:
return "cudaErrorMissingConfiguration";
case cudaErrorMemoryAllocation:
return "cudaErrorMemoryAllocation";
case cudaErrorInitializationError:
return "cudaErrorInitializationError";
case cudaErrorLaunchFailure:
return "cudaErrorLaunchFailure";
case cudaErrorPriorLaunchFailure:
return "cudaErrorPriorLaunchFailure";
case cudaErrorLaunchTimeout:
return "cudaErrorLaunchTimeout";
case cudaErrorLaunchOutOfResources:
return "cudaErrorLaunchOutOfResources";
case cudaErrorInvalidDeviceFunction:
return "cudaErrorInvalidDeviceFunction";
case cudaErrorInvalidConfiguration:
return "cudaErrorInvalidConfiguration";
case cudaErrorInvalidDevice:
return "cudaErrorInvalidDevice";
case cudaErrorInvalidValue:
return "cudaErrorInvalidValue";
case cudaErrorInvalidPitchValue:
return "cudaErrorInvalidPitchValue";
case cudaErrorInvalidSymbol:
return "cudaErrorInvalidSymbol";
case cudaErrorMapBufferObjectFailed:
return "cudaErrorMapBufferObjectFailed";
case cudaErrorUnmapBufferObjectFailed:
return "cudaErrorUnmapBufferObjectFailed";
case cudaErrorInvalidHostPointer:
return "cudaErrorInvalidHostPointer";
case cudaErrorInvalidDevicePointer:
return "cudaErrorInvalidDevicePointer";
case cudaErrorInvalidTexture:
return "cudaErrorInvalidTexture";
case cudaErrorInvalidTextureBinding:
return "cudaErrorInvalidTextureBinding";
case cudaErrorInvalidChannelDescriptor:
return "cudaErrorInvalidChannelDescriptor";
case cudaErrorInvalidMemcpyDirection:
return "cudaErrorInvalidMemcpyDirection";
case cudaErrorAddressOfConstant:
return "cudaErrorAddressOfConstant";
case cudaErrorTextureFetchFailed:
return "cudaErrorTextureFetchFailed";
case cudaErrorTextureNotBound:
return "cudaErrorTextureNotBound";
case cudaErrorSynchronizationError:
return "cudaErrorSynchronizationError";
case cudaErrorInvalidFilterSetting:
return "cudaErrorInvalidFilterSetting";
case cudaErrorInvalidNormSetting:
return "cudaErrorInvalidNormSetting";
case cudaErrorMixedDeviceExecution:
return "cudaErrorMixedDeviceExecution";
case cudaErrorCudartUnloading:
return "cudaErrorCudartUnloading";
case cudaErrorUnknown:
return "cudaErrorUnknown";
case cudaErrorNotYetImplemented:
return "cudaErrorNotYetImplemented";
case cudaErrorMemoryValueTooLarge:
return "cudaErrorMemoryValueTooLarge";
case cudaErrorInvalidResourceHandle:
return "cudaErrorInvalidResourceHandle";
case cudaErrorNotReady:
return "cudaErrorNotReady";
case cudaErrorInsufficientDriver:
return "cudaErrorInsufficientDriver";
case cudaErrorSetOnActiveProcess:
return "cudaErrorSetOnActiveProcess";
case cudaErrorInvalidSurface:
return "cudaErrorInvalidSurface";
case cudaErrorNoDevice:
return "cudaErrorNoDevice";
case cudaErrorECCUncorrectable:
return "cudaErrorECCUncorrectable";
case cudaErrorSharedObjectSymbolNotFound:
return "cudaErrorSharedObjectSymbolNotFound";
case cudaErrorSharedObjectInitFailed:
return "cudaErrorSharedObjectInitFailed";
case cudaErrorUnsupportedLimit:
return "cudaErrorUnsupportedLimit";
case cudaErrorDuplicateVariableName:
return "cudaErrorDuplicateVariableName";
case cudaErrorDuplicateTextureName:
return "cudaErrorDuplicateTextureName";
case cudaErrorDuplicateSurfaceName:
return "cudaErrorDuplicateSurfaceName";
case cudaErrorDevicesUnavailable:
return "cudaErrorDevicesUnavailable";
case cudaErrorInvalidKernelImage:
return "cudaErrorInvalidKernelImage";
case cudaErrorNoKernelImageForDevice:
return "cudaErrorNoKernelImageForDevice";
case cudaErrorIncompatibleDriverContext:
return "cudaErrorIncompatibleDriverContext";
case cudaErrorPeerAccessAlreadyEnabled:
return "cudaErrorPeerAccessAlreadyEnabled";
case cudaErrorPeerAccessNotEnabled:
return "cudaErrorPeerAccessNotEnabled";
case cudaErrorDeviceAlreadyInUse:
return "cudaErrorDeviceAlreadyInUse";
case cudaErrorProfilerDisabled:
return "cudaErrorProfilerDisabled";
case cudaErrorProfilerNotInitialized:
return "cudaErrorProfilerNotInitialized";
case cudaErrorProfilerAlreadyStarted:
return "cudaErrorProfilerAlreadyStarted";
case cudaErrorProfilerAlreadyStopped:
return "cudaErrorProfilerAlreadyStopped";
/* Since CUDA 4.0*/
case cudaErrorAssert:
return "cudaErrorAssert";
case cudaErrorTooManyPeers:
return "cudaErrorTooManyPeers";
case cudaErrorHostMemoryAlreadyRegistered:
return "cudaErrorHostMemoryAlreadyRegistered";
case cudaErrorHostMemoryNotRegistered:
return "cudaErrorHostMemoryNotRegistered";
/* Since CUDA 5.0 */
case cudaErrorOperatingSystem:
return "cudaErrorOperatingSystem";
case cudaErrorPeerAccessUnsupported:
return "cudaErrorPeerAccessUnsupported";
case cudaErrorLaunchMaxDepthExceeded:
return "cudaErrorLaunchMaxDepthExceeded";
case cudaErrorLaunchFileScopedTex:
return "cudaErrorLaunchFileScopedTex";
case cudaErrorLaunchFileScopedSurf:
return "cudaErrorLaunchFileScopedSurf";
case cudaErrorSyncDepthExceeded:
return "cudaErrorSyncDepthExceeded";
case cudaErrorLaunchPendingCountExceeded:
return "cudaErrorLaunchPendingCountExceeded";
case cudaErrorNotPermitted:
return "cudaErrorNotPermitted";
case cudaErrorNotSupported:
return "cudaErrorNotSupported";
/* Since CUDA 6.0 */
case cudaErrorHardwareStackError:
return "cudaErrorHardwareStackError";
case cudaErrorIllegalInstruction:
return "cudaErrorIllegalInstruction";
case cudaErrorMisalignedAddress:
return "cudaErrorMisalignedAddress";
case cudaErrorInvalidAddressSpace:
return "cudaErrorInvalidAddressSpace";
case cudaErrorInvalidPc:
return "cudaErrorInvalidPc";
case cudaErrorIllegalAddress:
return "cudaErrorIllegalAddress";
/* Since CUDA 6.5*/
case cudaErrorInvalidPtx:
return "cudaErrorInvalidPtx";
case cudaErrorInvalidGraphicsContext:
return "cudaErrorInvalidGraphicsContext";
case cudaErrorStartupFailure:
return "cudaErrorStartupFailure";
case cudaErrorApiFailureBase:
return "cudaErrorApiFailureBase";
}
return "<unknown>";
}
#endif
template< typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
if (result)
{
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
cudaDeviceReset();
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
int *pArgc = NULL;
char **pArgv = NULL;
#if CUDART_VERSION < 5000
// CUDA-C includes
#include <cuda.h>
// This function wraps the CUDA Driver API into a template function
template <class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
{
CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
if (CUDA_SUCCESS != error) {
fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
error, __FILE__, __LINE__);
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
exit(EXIT_FAILURE);
}
}
#endif /* CUDART_VERSION < 5000 */
// Beginning of GPU Architecture definitions
inline int ConvertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] = {
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
return nGpuArchCoresPerSM[index-1].Cores;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
pArgc = &argc;
pArgv = argv;
printf("%s Starting...\n\n", argv[0]);
printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
int deviceCount = 0;
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
if (error_id != cudaSuccess) {
printf("cudaGetDeviceCount failed: %s (%d)\n",
cudaGetErrorString(error_id), (int) error_id);
printf("Result = FAIL\n");
exit(EXIT_FAILURE);
}
// This function call returns 0 if there are no CUDA capable devices.
if (deviceCount == 0)
printf("There are no available device(s) that support CUDA\n");
else
printf("Detected %d CUDA Capable device(s)\n", deviceCount);
int dev, driverVersion = 0, runtimeVersion = 0;
for (dev = 0; dev < deviceCount; ++dev) {
cudaSetDevice(dev);
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
// Console log
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);
printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n",
(float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",
deviceProp.multiProcessorCount,
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
#if CUDART_VERSION >= 5000
// This is supported in CUDA 5.0 (runtime API device properties)
printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
if (deviceProp.l2CacheSize) {
printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);
}
#else
// This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
int memoryClock;
getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);
int memBusWidth;
getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
printf(" Memory Bus Width: %d-bit\n", memBusWidth);
int L2CacheSize;
getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
if (L2CacheSize) {
printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
}
#endif
printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",
deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);
printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
printf(" Warp size: %d\n", deviceProp.warpSize);
printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
deviceProp.maxThreadsDim[0],
deviceProp.maxThreadsDim[1],
deviceProp.maxThreadsDim[2]);
printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
deviceProp.maxGridSize[0],
deviceProp.maxGridSize[1],
deviceProp.maxGridSize[2]);
printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);
printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);
printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
#endif
printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
const char *sComputeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
"Prohibited (no host thread can use ::cudaSetDevice() with this device)",
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
"Unknown",
NULL
};
printf(" Compute Mode:\n");
printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
}
// If there are 2 or more GPUs, query to determine whether RDMA is supported
if (deviceCount >= 2)
{
cudaDeviceProp prop[64];
int gpuid[64]; // we want to find the first two GPU's that can support P2P
int gpu_p2p_count = 0;
for (int i=0; i < deviceCount; i++)
{
checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
// Only boards based on Fermi or later can support P2P
if ((prop[i].major >= 2)
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
&& prop[i].tccDriver
#endif
)
{
// This is an array of P2P capable GPUs
gpuid[gpu_p2p_count++] = i;
}
}
// Show all the combinations of support P2P GPUs
int can_access_peer_0_1, can_access_peer_1_0;
if (gpu_p2p_count >= 2)
{
for (int i = 0; i < gpu_p2p_count-1; i++)
{
for (int j = 1; j < gpu_p2p_count; j++)
{
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
prop[gpuid[j]].name, gpuid[j] ,
can_access_peer_0_1 ? "Yes" : "No");
}
}
for (int j = 1; j < gpu_p2p_count; j++)
{
for (int i = 0; i < gpu_p2p_count-1; i++)
{
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
prop[gpuid[i]].name, gpuid[i] ,
can_access_peer_1_0 ? "Yes" : "No");
}
}
}
}
// csv masterlog info
// *****************************
// exe and CUDA driver name
printf("\n");
std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
char cTemp[128];
// driver version
sProfileString += ", CUDA Driver Version = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
#else
sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
#endif
sProfileString += cTemp;
// Runtime version
sProfileString += ", CUDA Runtime Version = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
#else
sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
#endif
sProfileString += cTemp;
// Device count
sProfileString += ", NumDevs = ";
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 10, "%d", deviceCount);
#else
sprintf(cTemp, "%d", deviceCount);
#endif
sProfileString += cTemp;
// Print Out all device Names
for (dev = 0; dev < deviceCount; ++dev)
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
sprintf_s(cTemp, 13, ", Device%d = ", dev);
#else
sprintf(cTemp, ", Device%d = ", dev);
#endif
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
sProfileString += cTemp;
sProfileString += deviceProp.name;
}
sProfileString += "\n";
printf("%s", sProfileString.c_str());
printf("Result = PASS\n");
// finish
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
return 0;
}

View File

@@ -1,43 +0,0 @@
{
stdenv
, cudatoolkit
, cudaPackages
, autoAddDriverRunpath
, strace
}:
stdenv.mkDerivation (finalAttrs: {
name = "cudainfo";
src = ./.;
buildInputs = [
cudatoolkit # Required for nvcc
cudaPackages.cuda_cudart.static # Required for -lcudart_static
autoAddDriverRunpath
];
installPhase = ''
mkdir -p $out/bin
cp -a cudainfo $out/bin
'';
passthru.gpuCheck = stdenv.mkDerivation {
name = "cudainfo-test";
requiredSystemFeatures = [ "cuda" ];
dontBuild = true;
nativeCheckInputs = [
finalAttrs.finalPackage # The cudainfo package from above
strace # When it fails, it will show the trace
];
dontUnpack = true;
doCheck = true;
checkPhase = ''
if ! cudainfo; then
set -x
cudainfo=$(command -v cudainfo)
ldd $cudainfo
readelf -d $cudainfo
strace -f $cudainfo
set +x
fi
'';
installPhase = "touch $out";
};
})

View File

@@ -1,25 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "meteocat-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
beautifulsoup4
lxml
prometheus-client
];
meta = with lib; {
description = "MeteoCat Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,54 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
from bs4 import BeautifulSoup
from urllib import request
# Configuration -------------------------------------------
meteo_station = "X8" # Barcelona - Zona Universitària
listening_port = 9929
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_tmin = Gauge('meteocat_temp_min', 'Min temperature')
metric_tmax = Gauge('meteocat_temp_max', 'Max temperature')
metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature')
metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation')
def update(st):
url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st
response = request.urlopen(url)
data = response.read()
soup = BeautifulSoup(data, 'lxml')
table = soup.find("table", {"class" : "tblperiode"})
rows = table.find_all('tr')
row = rows[-1] # Take the last row
row_data = []
header = row.find('th')
header_text = header.text.strip()
row_data.append(header_text)
for col in row.find_all('td'):
row_data.append(col.text)
try:
# Sometimes it will return '(s/d)' and fail to parse
metric_tavg.set(float(row_data[1]))
metric_tmax.set(float(row_data[2]))
metric_tmin.set(float(row_data[3]))
metric_srad.set(float(row_data[10]))
#print("ok: temp_avg={}".format(float(row_data[1])))
except:
print("cannot parse row: {}".format(row))
metric_tavg.set(float("nan"))
metric_tmax.set(float("nan"))
metric_tmin.set(float("nan"))
metric_srad.set(float("nan"))
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
update(meteo_station)
except:
print("update failed")
time.sleep(update_period)

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='meteocat-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["meteocat-exporter"],
)

View File

@@ -0,0 +1,36 @@
diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
index 33e88bc..ee3641c 100644
--- a/src/util/mpir_hwtopo.c
+++ b/src/util/mpir_hwtopo.c
@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
#ifdef HAVE_HWLOC
bindset = hwloc_bitmap_alloc();
hwloc_topology_init(&hwloc_topology);
- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
- if (xmlfile != NULL) {
- int rc;
- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
- if (rc == 0) {
- /* To have hwloc still actually call OS-specific hooks, the
- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
- * file is really the underlying system. */
- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
- }
- MPL_free(xmlfile);
- }
hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
if (!hwloc_topology_load(hwloc_topology))
--- a/src/mpi/init/local_proc_attrs.c
+++ b/src/mpi/init/local_proc_attrs.c
@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
/* Set the number of tag bits. The device may override this value. */
MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
- MPL_free(requested_kinds);
-
return mpi_errno;
}

View File

@@ -11,6 +11,10 @@ final: prev:
paths = [ pmix.dev pmix.out ];
};
in prev.mpich.overrideAttrs (old: {
patches = [
# See https://github.com/pmodels/mpich/issues/6946
./mpich-fix-hwtopo.patch
];
buildInput = old.buildInputs ++ [
libfabric
pmixAll
@@ -35,33 +39,7 @@ final: prev:
# See https://bugs.schedmd.com/show_bug.cgi?id=19324
./slurm-rank-expansion.patch
];
# Install also the pam_slurm_adopt library to restrict users from accessing
# nodes with no job allocated.
postBuild = (old.postBuild or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security"
popd
'';
postInstall = (old.postInstall or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security" install
popd
'';
});
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
cudainfo = prev.callPackage ./cudainfo/default.nix { };
amd-uprof = prev.callPackage ./amd-uprof/default.nix { };
# FIXME: Extend this to all linuxPackages variants. Open problem, see:
# https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636
linuxPackages = prev.linuxPackages.extend (_final: _prev: {
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
});
linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: {
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
});
}

View File

@@ -1,24 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "upc-qaire-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
prometheus-client
requests
];
meta = with lib; {
description = "UPC Qaire Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='upc-qaire-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["upc-qaire-exporter"],
)

View File

@@ -1,74 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
import requests, json
from datetime import datetime, timedelta
# Configuration -------------------------------------------
listening_port = 9928
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor')
def genparams():
d = {}
d['topic'] = 'TEMPERATURE'
d['shift_dates_to'] = ''
d['datapoints'] = 301
d['devicesAndColors'] = '1148418@@@#40ACB6'
now = datetime.now()
d['fromDate'] = now.strftime('%d/%m/%Y')
d['toDate'] = now.strftime('%d/%m/%Y')
d['serviceFrequency'] = 'NONE'
# WTF!
for i in range(7):
for j in range(48):
key = 'week.days[{}].hours[{}].value'.format(i, j)
d[key] = 'OPEN'
return d
def measure():
# First we need to load session
s = requests.Session()
r = s.get("https://upc.edu/sirena")
if r.status_code != 200:
print("bad HTTP status code on new session: {}".format(r.status_code))
return
if s.cookies.get("JSESSIONID") is None:
print("cannot get JSESSIONID")
return
# Now we can pull the data
url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json"
r = s.post(url, data=genparams())
if r.status_code != 200:
print("bad HTTP status code on data: {}".format(r.status_code))
return
#print(r.text)
j = json.loads(r.content)
# Just take the last one
last = j['data']['chartElementList'][-1]
temp = last['values']['1148418-Temperatura']
return temp
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
metric_temp.set(measure())
except:
print("measure failed")
metric_temp.set(float("nan"))
time.sleep(update_period)

View File

@@ -1,25 +1,21 @@
age-encryption.org/v1
-> ssh-ed25519 AY8zKw /gmhFOFqOs8IobAImvQVKeM5Y6k0FpuR61/Cu5drVVI
g9FXJg2oIoien0zJ70FWHwSTM8SBwbpS188S3Swj7EM
-> ssh-ed25519 sgAamA opPjlWPhSiI0Rd5l7kd204S5FXFLcQcQftyKb7MDmnU
3XrRDVnglCP+vBwvfd1rP5gHttsGDHyXwbf10a8/kKY
-> ssh-ed25519 HY2yRg QKZbubM76C3tobPoyCFDRclA9Pzb2fC7s4WOoIgdORc
K5kckU0KhQFTE6SikJXFJgM41Tco5+VqOsaG0qLrY1Q
-> ssh-ed25519 fw2Xhg +ohqts8dLFjvdHxrGHcOGxU0dm+V3N//giljHkobpDM
jR/UzGrfS9lrJ/VeolKLxfzeJAf2fIB2pdIn/6ukqNk
-> ssh-ed25519 tcumPQ 3DPkDPIQQSVtXSLzIRETsIyXQ0k1o18Evn6vf+l/6R8
bLXF62OmJjnOT1vvgq3+AcOKKSG5NonrK5EqCVc0Mwo
-> ssh-ed25519 JJ1LWg 2Wefc7eLolMU5InEmCNTq21Mf71mI0a2N1HgDrlHvy4
qXFW9CQBnrzubZ0mzS0Io2WGRrwGBkmeYndBTcZn/fM
-> ssh-ed25519 cDBabA oiH36AoIt/fFFYgnoxtH7OoetP+2/wjtn8qo3RJDSHc
qKmkxy1aZGP4ZwC0iH7n7hiJ0+rFQYvjQb5O1a1Z0r4
-> ssh-ed25519 cK5kHw bX3RtO5StMejUYWAaA37fjHA5nO7Xs1vWDQk3yOjs2o
Egxmcf8FKAd+E5hMLmhV1yQsCo5rJyUazf1szOvpTAM
-> ssh-ed25519 CAWG4Q oKqqRDJH0w8lsoQBQk0w8PO+z5gFNmSaGBUSumvDp1I
m1zWp9MfViAmtpbJhqOHraIokDaPKb0DvvO4vAGCTWI
-> ssh-ed25519 xA739A G26kPOz6sbFATs+KAr7gbDvji13eA1smFusQAOJXMwA
Sppvz7A103kZoNxoGsd6eXeCvVh7mBE2MRwLFj9O1dY
-> ssh-ed25519 MSF3dg 55ekNcp+inbUd+GQ/VZ7BoBASaJ8YDqF74CVXy1PUxQ
aTHLLAbzQPWWld/OT3BKebc6FcmsqMTaWCPBGm1UHic
--- mVkAMnI9XQhS3fMiFuuXP/yLR9wEG9+Rr8pA4Uc0avY
<04>DU <20><>s<EFBFBD><73><EFBFBD><EFBFBD>j<EFBFBD><6A>M<EFBFBD><4D>$<24>[<5B>M<EFBFBD><4D><EFBFBD><03>[_<>K7s<37>ju<>v<EFBFBD>D<EFBFBD>4<EFBFBD>g<EFBFBD><67>܄3<>Gn<47><6E><EFBFBD> ɽ<>P<EFBFBD>7~rZs<><73>
-> ssh-ed25519 AY8zKw J00a6ZOhkupkhLU5WQ0kD05HEF4KKsSs2hwjHKbnnHU
J14VoNOCqLpScVO7OLXbqTcLI4tcVUHt5cqY/XQmbGs
-> ssh-ed25519 sgAamA k8R/bSUdvVmlBI6yHPi5NBQPBGM36lPJwsir8DFGgxE
4ZKC3gYvic6AVrNGgNjwztbUzhxP8ViX5O3wFo9wlrk
-> ssh-ed25519 HY2yRg 966xf2fTnA6Wq0uYXbXZQOManqITJcCbQS9LZCGEOh4
Qg5echQSrzqeDqvaMx+5fqi8XyTjAeCsY/UFJX6YnDs
-> ssh-ed25519 tcumPQ e0U2okrGIoUpLfPYjIRx1V92rE3hZW13nJef+l3kBQg
LejAUKBl+tPhwocCF00ZHTzFISnwX8og8GvemiMIcyo
-> ssh-ed25519 JJ1LWg QkzTsPq9Gdh+FNz/a4bDb9LQOreFyxeTC51UNd1fsj0
ayrlKenETfQzH1Z9drVEWqszQebicGVJve0/pCnxAE8
-> ssh-ed25519 CAWG4Q lJLW9+dxvyoD4hYzeXeE/4rzJ6HIeEQOB1+fbhV3xw0
T2RrVCtTuQvya9HiJB7txk3QGrntpsMX9Tt1cyXoW5E
-> ssh-ed25519 MSF3dg JOZkFb2CfqWKvZIz7lYxXWgv8iEVDkQF8hInDMZvknc
MHDWxjUw4dNiC1h4MrU9uKKcI3rwkxABm0+5FYMZkok
-> ~8m;7f-grease
lDIullfC98RhpTZ4Mk87Td+VtPmwPdgz+iIilpKugUkmV5r4Uqd7yE+5ArA6ekr/
G/X4EA
--- Cz4sv9ZunBcVdZCozdTh1zlg1zIASjk2MjYeYfcN9eA
<EFBFBD>N <09>$[H<><48>Q<EFBFBD><51><EFBFBD>
d<EFBFBD><EFBFBD><EFBFBD>'<27><><EFBFBD>7<EFBFBD><1F>Ͳ)<29><><EFBFBD><17>x9y<39><79><EFBFBD>E<04><><EFBFBD>M7^<5E>[<5B>M<EFBFBD>+<2B>&<26><><EFBFBD><0E>$8tM<74>в

View File

@@ -1,13 +1,9 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik
DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI
-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio
hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU
-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw
hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs
-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY
eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM
-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg
/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg
--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>YM<EFBFBD><EFBFBD>:E O<><4F>2<EFBFBD>r=<15>&4<><04>CQΣ<51><CEA3>hC<68><43><EFBFBD>cb<63>^Sy<53><79>% <09><>x-vC`g<><15><><EFBFBD><EFBFBD>W^<5E><>wVG <0B><><EFBFBD>
-> ssh-ed25519 HY2yRg DQdgCk16Yu524BsrWVf0krnwWzDM6SeaJCgQipOfwCA
Ab9ocqra/UWJZI+QGMlxUhBu5AzqfjPgXl+ENIiHYGs
-> ssh-ed25519 CAWG4Q KF9rGCenb3nf+wyz2hyVs/EUEbsmUs5R+1fBxlCibC8
7++Kxbr3FHVdVfnFdHYdAuR0Tgfd+sRcO6WRss6LhEw
-> ssh-ed25519 MSF3dg aUe4DhRsu4X8CFOEAnD/XM/o/0qHYSB522woCaAVh0I
GRcs5cm2YqA/lGhUtbpboBaz7mfgiLaCr+agaB7vACU
--- 9Q7Ou+Pxq+3RZilCb2dKC/pCFjZEt4rp5KnTUUU7WJ8
1<12>Mw4<77><34> <09>:H<>@<40>/<2F>gLtM<74>,<2C>ƥ<>*<2A><>z<EFBFBD>NV5<56>m<EFBFBD><6D>N<EFBFBD>o<EFBFBD><6F>j1 $<24>T<EFBFBD>G_<47>E{<7B>%<25><><17><>H<EFBFBD><EFBFBD>A<EFBFBD>p<EFBFBD>

Binary file not shown.

View File

@@ -1,13 +1,9 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg U2KQWviZIVNemm9e8h7H+eOzoYNxXgLLS3hsZLMAuGk
6n5dH1McNzk3rscP4v2pqZYDWtUFMd15rZsEd/mqIFM
-> ssh-ed25519 cK5kHw Ebrj/cpz1cFWAYAV9OxgyyH85OEMUnfUIV66p7jaoFY
6J7hWqODtS/fIF4BpxhxbrxZq5vbolvbLqRKqazT02M
-> ssh-ed25519 CAWG4Q mXqoQH9ycHF7u0y8mazCgynHxNLxTnrmQHke+2a5QCc
mq6PdSF+KOqthuXwzTCsOQsi5KG0z1wHUck+bSTyOBY
-> ssh-ed25519 xA739A TADeswueqDEroZWLjMw3RDNwVQ2xRD+JUMVZENovn0M
KFlnSjVFbjc+ZsbY8Ed7edC5B01TJGzd/dSryiLArPc
-> ssh-ed25519 MSF3dg Pq+ZD8AqJGDHDbd4PO1ngNFST8+6C2ghZkO/knKzzEc
wyiL/u38hdQMokmfTsBrY7CtYwc+31FG4EDaqVEn31U
--- 1z4cOipayh0zYkvasEVEvGreajegE/dqBV7b6E7aFh0
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>R<EFBFBD>@<40>/i<>I'<27><><EFBFBD>Nx<4E>r"<1D>`<1E>O<EFBFBD><4F><EFBFBD>y<><79>8<EFBFBD><38> \/<2F><>I<19><17>D<EFBFBD>`<60>ߓ<EFBFBD><DF93><EFBFBD><1E><04>uy<75><79><EFBFBD>:9Lt<4C><1D><><EFBFBD>؋<EFBFBD><D88B><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>AU<41><55><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>`<60>;<3B>q8<71>GLU#<23>i<EFBFBD>y<EFBFBD><79>i<03>ڜ
-> ssh-ed25519 HY2yRg WvKK6U1wQtx2pbUDfuaUIXTQiCulDkz7hgUCSwMfMzQ
jLktUMqKuVxukqzz++pHOKvmucUQqeKYy5IwBma7KxY
-> ssh-ed25519 CAWG4Q XKGuNNoYFl9bdZzsqYYTY7GsEt5sypLW4R+1uk78NmU
8dIA2GzRAwTGM5CDHSM2BUBsbXzEAUssWUz2PY2PaTg
-> ssh-ed25519 MSF3dg T630RsKuZIF/bp+KITnIIWWHsg6M/VQGqbWQZxqT+AA
SraZcgZJVtmUzHF/XR9J7aK5t5EDNpkC/av/WJUT/G8
--- /12G8pj9sbs591OM/ryhoLnSWWmzYcoqprk9uN/3g18
<EFBFBD><EFBFBD><EFBFBD><01>‡%<25>]yi"<22><><EFBFBD>L<EFBFBD> <0B><>H`<60>a$<24><>)<29>9ve<76>.0<EFBFBD>m<EFBFBD>K<EFBFBD>v<EFBFBD><EFBFBD> <0B>u"|1c<31>-%<25><>"<22>WF<12><><EFBFBD>A<EFBFBD><41>h<EFBFBD>$<05><>j<e<><65>x<EFBFBD>Lx<4C><78>.?<3F><><EFBFBD>:L<><4C><EFBFBD><EFBFBD>,<2C>u<EFBFBD>|<7C><>F|<7C>i<EFBFBD><69><EFBFBD>

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More