11 Commits

Author SHA1 Message Date
e065cde376 Use NixOS attributes for the install section 2023-09-18 19:27:14 +02:00
3bb0b550aa Add a RequiredBy dependency for remote-fs.target 2023-09-18 19:05:58 +02:00
e4cbcab81c Use a systemd mount directly for the nix store
Allows the LazyUnmount option and avoids the stage1 hack with
/nix//store.
2023-09-18 18:53:40 +02:00
77b41a90e2 Patch nix instead of using an override unit 2023-09-18 18:06:51 +02:00
1fc6891dc6 Remove nix-daemon.socket dependency of /nix/store
The dependency causes a cycle as the nix store will be mounted after the
network is ready, which itself depends on the socket.target which
requires the nix-daemon.socket to be ready too.
2023-09-18 17:28:47 +02:00
8c11c7460a Delay the mount until the network is ready 2023-09-18 16:07:46 +02:00
e6014511f5 Wait for the NFS hut store to be mounted 2023-09-18 15:50:37 +02:00
320c58ce48 Prevent the overlay to be mounted in stage1 2023-09-18 13:57:41 +02:00
d145ee9b2c Mount the overlay in /nix/store 2023-09-18 13:02:32 +02:00
140178d58e Begin the nix store overlay
We need to disable the read-only bind mount, so we can directly bind
mount the overlay.
2023-09-18 11:22:24 +02:00
d48f3b989a Enable direnv integration 2023-09-17 22:27:51 +02:00
112 changed files with 860 additions and 2736 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,2 @@
*.swp *.swp
/result /result
/misc

View File

@@ -150,27 +150,3 @@ And update grub.
``` ```
# nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v # nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v
``` ```
## Chain NixOS in same disk with other systems
To install NixOS on a partition along another system which controls the GRUB,
first disable the grub device, so the GRUB is not installed in the disk by
NixOS (only the /boot files will be generated):
```
boot.loader.grub.device = "nodev";
```
Then add the following entry to the old GRUB configuration:
```
menuentry 'NixOS' {
insmod chain
search --no-floppy --label nixos --set root
configfile /boot/grub/grub.cfg
}
```
The partition with NixOS must have the label "nixos" for it to be found. New
system configuration entries will be stored in the GRUB configuration managed
by NixOS, so there is no need to change the old GRUB settings.

56
flake.lock generated
View File

@@ -6,15 +6,14 @@
"home-manager": "home-manager", "home-manager": "home-manager",
"nixpkgs": [ "nixpkgs": [
"nixpkgs" "nixpkgs"
], ]
"systems": "systems"
}, },
"locked": { "locked": {
"lastModified": 1723293904, "lastModified": 1690228878,
"narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=", "narHash": "sha256-9Xe7JV0krp4RJC9W9W9WutZVlw6BlHTFMiUP/k48LQY=",
"owner": "ryantm", "owner": "ryantm",
"repo": "agenix", "repo": "agenix",
"rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41", "rev": "d8c973fd228949736dedf61b7f8cc1ece3236792",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -30,17 +29,17 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1732868163, "lastModified": 1694708510,
"narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=", "narHash": "sha256-72bvRBhq8Q8V6ibsR9lyBE92V2EC6C6Ek3J5cOM79So=",
"ref": "refs/heads/master", "ref": "refs/heads/master",
"rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f", "rev": "3a4062ac04be6263c64a481420d8e768c2521b80",
"revCount": 952, "revCount": 862,
"type": "git", "type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs" "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git"
}, },
"original": { "original": {
"type": "git", "type": "git",
"url": "https://git.sr.ht/~rodarima/bscpkgs" "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git"
} }
}, },
"darwin": { "darwin": {
@@ -51,11 +50,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1700795494, "lastModified": 1673295039,
"narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=", "narHash": "sha256-AsdYgE8/GPwcelGgrntlijMg4t3hLFJFCRF3tL5WVjA=",
"owner": "lnl7", "owner": "lnl7",
"repo": "nix-darwin", "repo": "nix-darwin",
"rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d", "rev": "87b9d090ad39b25b2400029c64825fc2a8868943",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -73,11 +72,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1703113217, "lastModified": 1682203081,
"narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=", "narHash": "sha256-kRL4ejWDhi0zph/FpebFYhzqlOBrk0Pl3dzGEKSAlEw=",
"owner": "nix-community", "owner": "nix-community",
"repo": "home-manager", "repo": "home-manager",
"rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1", "rev": "32d3e39c491e2f91152c84f8ad8b003420eab0a1",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -88,16 +87,16 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1736867362, "lastModified": 1693663421,
"narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=", "narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc", "rev": "e56990880811a451abd32515698c712788be5720",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "NixOS", "owner": "NixOS",
"ref": "nixos-24.11", "ref": "nixos-unstable",
"repo": "nixpkgs", "repo": "nixpkgs",
"type": "github" "type": "github"
} }
@@ -108,21 +107,6 @@
"bscpkgs": "bscpkgs", "bscpkgs": "bscpkgs",
"nixpkgs": "nixpkgs" "nixpkgs": "nixpkgs"
} }
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
} }
}, },
"root": "root", "root": "root",

View File

@@ -1,9 +1,9 @@
{ {
inputs = { inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
agenix.url = "github:ryantm/agenix"; agenix.url = "github:ryantm/agenix";
agenix.inputs.nixpkgs.follows = "nixpkgs"; agenix.inputs.nixpkgs.follows = "nixpkgs";
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git";
bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
}; };
@@ -17,16 +17,13 @@ let
in in
{ {
nixosConfigurations = { nixosConfigurations = {
hut = mkConf "hut"; hut = mkConf "hut";
tent = mkConf "tent"; owl1 = mkConf "owl1";
owl1 = mkConf "owl1"; owl2 = mkConf "owl2";
owl2 = mkConf "owl2"; eudy = mkConf "eudy";
eudy = mkConf "eudy"; koro = mkConf "koro";
koro = mkConf "koro"; bay = mkConf "bay";
bay = mkConf "bay"; lake2 = mkConf "lake2";
lake2 = mkConf "lake2";
raccoon = mkConf "raccoon";
fox = mkConf "fox";
}; };
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {

View File

@@ -9,11 +9,9 @@ rec {
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
}; };
hostGroup = with hosts; rec { hostGroup = with hosts; rec {
untrusted = [ fox ];
compute = [ owl1 owl2 ]; compute = [ owl1 owl2 ];
playground = [ eudy koro ]; playground = [ eudy koro ];
storage = [ bay lake2 ]; storage = [ bay lake2 ];

View File

@@ -2,21 +2,21 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/main.nix
../module/monitoring.nix ../common/monitoring.nix
]; ];
# Select the this using the ID to avoid mismatches # Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d"; boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d";
boot.kernel.sysctl = {
"kernel.yama.ptrace_scope" = lib.mkForce "1";
};
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
ceph ceph
]; ];
services.slurm = {
client.enable = lib.mkForce false;
};
networking = { networking = {
hostName = "bay"; hostName = "bay";
interfaces.eno1.ipv4.addresses = [ { interfaces.eno1.ipv4.addresses = [ {
@@ -27,16 +27,6 @@
address = "10.0.42.40"; address = "10.0.42.40";
prefixLength = 24; prefixLength = 24;
} ]; } ];
firewall = {
extraCommands = ''
# Accept all incoming TCP traffic from lake2
iptables -A nixos-fw -p tcp -s lake2 -j nixos-fw-accept
# Accept monitoring requests from hut
iptables -A nixos-fw -p tcp -s hut -m multiport --dport 9283,9002 -j nixos-fw-accept
# Accept all Ceph traffic from the local network
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept
'';
};
}; };
services.ceph = { services.ceph = {

View File

@@ -1,20 +0,0 @@
{
# All machines should include this profile.
# Includes the basic configuration for an Intel server.
imports = [
./base/agenix.nix
./base/august-shutdown.nix
./base/boot.nix
./base/env.nix
./base/fs.nix
./base/hw.nix
./base/net.nix
./base/nix.nix
./base/ntp.nix
./base/rev.nix
./base/ssh.nix
./base/users.nix
./base/watchdog.nix
./base/zsh.nix
];
}

View File

@@ -1,14 +0,0 @@
{
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
# hardware from spurious electrical peaks on the yearly electrical cut for
# manteinance that starts on August 4th.
systemd.timers.august-shutdown = {
description = "Shutdown on August 2nd for maintenance";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*-08-02 11:00:00";
RandomizedDelaySec = "10min";
Unit = "systemd-poweroff.service";
};
};
}

View File

@@ -1,35 +0,0 @@
{ pkgs, config, ... }:
{
environment.systemPackages = with pkgs; [
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns
# From bsckgs overlay
osumb
];
programs.direnv.enable = true;
# Increase limits
security.pam.loginLimits = [
{
domain = "*";
type = "-";
item = "memlock";
value = "1048576"; # 1 GiB of mem locked
}
];
environment.variables = {
EDITOR = "vim";
VISUAL = "vim";
};
programs.bash.promptInit = ''
PS1="\h\\$ "
'';
time.timeZone = "Europe/Madrid";
i18n.defaultLocale = "en_DK.UTF-8";
}

View File

@@ -1,19 +0,0 @@
{ pkgs, ... }:
{
networking = {
enableIPv6 = false;
useDHCP = false;
firewall = {
enable = true;
allowedTCPPorts = [ 22 ];
};
hosts = {
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
"84.88.51.152" = [ "raccoon" ];
"84.88.51.142" = [ "raccoon-ipmi" ];
};
};
}

View File

@@ -1,58 +0,0 @@
{ pkgs, nixpkgs, bscpkgs, theFlake, ... }:
{
nixpkgs.overlays = [
bscpkgs.bscOverlay
(import ../../../pkgs/overlay.nix)
];
nix = {
nixPath = [
"nixpkgs=${nixpkgs}"
"jungle=${theFlake.outPath}"
];
registry = {
nixpkgs.flake = nixpkgs;
jungle.flake = theFlake;
};
settings = {
experimental-features = [ "nix-command" "flakes" ];
sandbox = "relaxed";
trusted-users = [ "@wheel" ];
flake-registry = pkgs.writeText "global-registry.json"
''{"flakes":[],"version":2}'';
keep-outputs = true;
};
gc = {
automatic = true;
dates = "weekly";
options = "--delete-older-than 30d";
};
};
# The nix-gc.service can begin its execution *before* /home is mounted,
# causing it to remove all gcroots considering them as stale, as it cannot
# access the symlink. To prevent this problem, we force the service to wait
# until /home is mounted as well as other remote FS like /ceph.
systemd.services.nix-gc = {
# Start remote-fs.target if not already being started and fail if it fails
# to start. It will also be stopped if the remote-fs.target fails after
# starting successfully.
bindsTo = [ "remote-fs.target" ];
# Wait until remote-fs.target fully starts before starting this one.
after = [ "remote-fs.target"];
# Ensure we can access a remote path inside /home
unitConfig.ConditionPathExists = "/home/Computational";
};
# This value determines the NixOS release from which the default
# settings for stateful data, like file locations and database versions
# on your system were taken. Its perfectly fine and recommended to leave
# this value at the release version of the first install of this system.
# Before changing this value read the documentation for this option
# (e.g. man configuration.nix or on https://nixos.org/nixos/options.html).
system.stateVersion = "22.11"; # Did you read the comment?
}

View File

@@ -2,7 +2,7 @@
{ {
# Use the GRUB 2 boot loader. # Use the GRUB 2 boot loader.
boot.loader.grub.enable = true; boot.loader.grub.enable = lib.mkForce true;
# Enable GRUB2 serial console # Enable GRUB2 serial console
boot.loader.grub.extraConfig = '' boot.loader.grub.extraConfig = ''
@@ -11,12 +11,14 @@
terminal_output --append serial terminal_output --append serial
''; '';
# Enable serial console
boot.kernelParams = [
"console=tty1"
"console=ttyS0,115200"
];
boot.kernel.sysctl = { boot.kernel.sysctl = {
"kernel.perf_event_paranoid" = lib.mkDefault "-1"; "kernel.perf_event_paranoid" = lib.mkDefault "-1";
# Allow ptracing (i.e. attach with GDB) any process of the same user, see:
# https://www.kernel.org/doc/Documentation/security/Yama.txt
"kernel.yama.ptrace_scope" = "0";
}; };
boot.kernelPackages = pkgs.linuxPackages_latest; boot.kernelPackages = pkgs.linuxPackages_latest;

View File

@@ -13,12 +13,16 @@
[ { device = "/dev/disk/by-label/swap"; } [ { device = "/dev/disk/by-label/swap"; }
]; ];
# Mount the home via NFS
fileSystems."/home" = {
device = "10.0.40.30:/home";
fsType = "nfs";
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
# Tracing # Tracing
fileSystems."/sys/kernel/tracing" = { fileSystems."/sys/kernel/tracing" = {
device = "none"; device = "none";
fsType = "tracefs"; fsType = "tracefs";
}; };
# Mount a tmpfs into /tmp
boot.tmp.useTmpfs = true;
} }

97
m/common/main.nix Normal file
View File

@@ -0,0 +1,97 @@
{ config, pkgs, nixpkgs, bscpkgs, agenix, theFlake, ... }:
{
imports = [
./agenix.nix
./boot.nix
./fs.nix
./hw.nix
./net.nix
./ntp.nix
./slurm.nix
./ssh.nix
./users.nix
./watchdog.nix
./rev.nix
./zsh.nix
];
nixpkgs.overlays = [
bscpkgs.bscOverlay
(import ../../pkgs/overlay.nix)
];
system.configurationRevision =
if theFlake ? rev
then theFlake.rev
else throw ("Refusing to build from a dirty Git tree!");
nix.nixPath = [
"nixpkgs=${nixpkgs}"
"jungle=${theFlake.outPath}"
];
nix.settings.flake-registry =
pkgs.writeText "global-registry.json" ''{"flakes":[],"version":2}'';
nix.registry.nixpkgs.flake = nixpkgs;
nix.registry.jungle.flake = theFlake;
environment.systemPackages = with pkgs; [
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns
# From bsckgs overlay
bsc.osumb
];
programs.direnv.enable = true;
systemd.services."serial-getty@ttyS0" = {
enable = true;
wantedBy = [ "getty.target" ];
serviceConfig.Restart = "always";
};
# Increase limits
security.pam.loginLimits = [
{
domain = "*";
type = "-";
item = "memlock";
value = "1048576"; # 1 GiB of mem locked
}
];
time.timeZone = "Europe/Madrid";
i18n.defaultLocale = "en_DK.UTF-8";
environment.variables = {
EDITOR = "vim";
VISUAL = "vim";
};
nix.settings.experimental-features = [ "nix-command" "flakes" ];
nix.settings.sandbox = "relaxed";
nix.settings.trusted-users = [ "@wheel" ];
nix.gc.automatic = true;
nix.gc.dates = "weekly";
nix.gc.options = "--delete-older-than 30d";
programs.bash.promptInit = ''
PS1="\h\\$ "
'';
# Copy the NixOS configuration file and link it from the resulting system
# (/run/current-system/configuration.nix). This is useful in case you
# accidentally delete configuration.nix.
#system.copySystemConfiguration = true;
# This value determines the NixOS release from which the default
# settings for stateful data, like file locations and database versions
# on your system were taken. Its perfectly fine and recommended to leave
# this value at the release version of the first install of this system.
# Before changing this value read the documentation for this option
# (e.g. man configuration.nix or on https://nixos.org/nixos/options.html).
system.stateVersion = "22.11"; # Did you read the comment?
}

View File

@@ -6,21 +6,24 @@
boot.kernelModules = [ "ib_umad" "ib_ipoib" ]; boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
networking = { networking = {
enableIPv6 = false;
useDHCP = false;
defaultGateway = "10.0.40.30"; defaultGateway = "10.0.40.30";
nameservers = ["8.8.8.8"]; nameservers = ["8.8.8.8"];
proxy = { proxy = {
default = "http://hut:23080/"; default = "http://localhost:23080/";
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40,hut"; noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40";
# Don't set all_proxy as go complains and breaks the gitlab runner, see: # Don't set all_proxy as go complains and breaks the gitlab runner, see:
# https://github.com/golang/go/issues/16715 # https://github.com/golang/go/issues/16715
allProxy = null; allProxy = null;
}; };
firewall = { firewall = {
enable = true;
allowedTCPPorts = [ 22 ];
extraCommands = '' extraCommands = ''
# Prevent ssfhead from contacting our slurmd daemon # Prevent ssfhead from contacting our slurmd daemon
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-log-refuse
# But accept traffic to slurm ports from any other node in the subnet # But accept traffic to slurm ports from any other node in the subnet
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
# We also need to open the srun port range # We also need to open the srun port range
@@ -29,42 +32,43 @@
}; };
extraHosts = '' extraHosts = ''
10.0.40.30 ssfhead 10.0.40.30 ssfhead
84.88.53.236 ssfhead.bsc.es ssfhead
# Node Entry for node: mds01 (ID=72) # Node Entry for node: mds01 (ID=72)
10.0.40.40 bay mds01 mds01-eth0 10.0.40.40 bay mds01 mds01-eth0
10.0.42.40 bay-ib mds01-ib0 10.0.42.40 bay-ib mds01-ib0
10.0.40.141 bay-ipmi mds01-ipmi0 mds01-ipmi 10.0.40.141 bay-ipmi mds01-ipmi0
# Node Entry for node: oss01 (ID=73) # Node Entry for node: oss01 (ID=73)
10.0.40.41 oss01 oss01-eth0 10.0.40.41 oss01 oss01-eth0
10.0.42.41 oss01-ib0 10.0.42.41 oss01-ib0
10.0.40.142 oss01-ipmi0 oss01-ipmi 10.0.40.142 oss01-ipmi0
# Node Entry for node: oss02 (ID=74) # Node Entry for node: oss02 (ID=74)
10.0.40.42 lake2 oss02 oss02-eth0 10.0.40.42 lake2 oss02 oss02-eth0
10.0.42.42 lake2-ib oss02-ib0 10.0.42.42 lake2-ib oss02-ib0
10.0.40.143 lake2-ipmi oss02-ipmi0 oss02-ipmi 10.0.40.143 lake2-ipmi oss02-ipmi0
# Node Entry for node: xeon01 (ID=15) # Node Entry for node: xeon01 (ID=15)
10.0.40.1 owl1 xeon01 xeon01-eth0 10.0.40.1 owl1 xeon01 xeon01-eth0
10.0.42.1 owl1-ib xeon01-ib0 10.0.42.1 owl1-ib xeon01-ib0
10.0.40.101 owl1-ipmi xeon01-ipmi0 xeon01-ipmi 10.0.40.101 owl1-ipmi xeon01-ipmi0
# Node Entry for node: xeon02 (ID=16) # Node Entry for node: xeon02 (ID=16)
10.0.40.2 owl2 xeon02 xeon02-eth0 10.0.40.2 owl2 xeon02 xeon02-eth0
10.0.42.2 owl2-ib xeon02-ib0 10.0.42.2 owl2-ib xeon02-ib0
10.0.40.102 owl2-ipmi xeon02-ipmi0 xeon02-ipmi 10.0.40.102 owl2-ipmi xeon02-ipmi0
# Node Entry for node: xeon03 (ID=17) # Node Entry for node: xeon03 (ID=17)
10.0.40.3 xeon03 xeon03-eth0 10.0.40.3 xeon03 xeon03-eth0
10.0.42.3 xeon03-ib0 10.0.42.3 xeon03-ib0
10.0.40.103 xeon03-ipmi0 xeon03-ipmi 10.0.40.103 xeon03-ipmi0
# Node Entry for node: xeon04 (ID=18) # Node Entry for node: xeon04 (ID=18)
10.0.40.4 xeon04 xeon04-eth0 10.0.40.4 xeon04 xeon04-eth0
10.0.42.4 xeon04-ib0 10.0.42.4 xeon04-ib0
10.0.40.104 xeon04-ipmi0 xeon04-ipmi 10.0.40.104 xeon04-ipmi0
# Node Entry for node: xeon05 (ID=19) # Node Entry for node: xeon05 (ID=19)
10.0.40.5 koro xeon05 xeon05-eth0 10.0.40.5 koro xeon05 xeon05-eth0
@@ -74,17 +78,17 @@
# Node Entry for node: xeon06 (ID=20) # Node Entry for node: xeon06 (ID=20)
10.0.40.6 xeon06 xeon06-eth0 10.0.40.6 xeon06 xeon06-eth0
10.0.42.6 xeon06-ib0 10.0.42.6 xeon06-ib0
10.0.40.106 xeon06-ipmi0 xeon06-ipmi 10.0.40.106 xeon06-ipmi0
# Node Entry for node: xeon07 (ID=21) # Node Entry for node: xeon07 (ID=21)
10.0.40.7 hut xeon07 xeon07-eth0 10.0.40.7 hut xeon07 xeon07-eth0
10.0.42.7 hut-ib xeon07-ib0 10.0.42.7 hut-ib xeon07-ib0
10.0.40.107 hut-ipmi xeon07-ipmi0 xeon07-ipmi 10.0.40.107 hut-ipmi xeon07-ipmi0
# Node Entry for node: xeon08 (ID=22) # Node Entry for node: xeon08 (ID=22)
10.0.40.8 eudy xeon08 xeon08-eth0 10.0.40.8 eudy xeon08 xeon08-eth0
10.0.42.8 eudy-ib xeon08-ib0 10.0.42.8 eudy-ib xeon08-ib0
10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi 10.0.40.108 eudy-ipmi xeon08-ipmi0
''; '';
}; };
} }

View File

@@ -1,7 +1,6 @@
{ theFlake, ... }: { theFlake, ... }:
let let
# Prevent building a configuration without revision
rev = if theFlake ? rev then theFlake.rev rev = if theFlake ? rev then theFlake.rev
else throw ("Refusing to build from a dirty Git tree!"); else throw ("Refusing to build from a dirty Git tree!");
in { in {
@@ -16,6 +15,4 @@ in {
DATENOW=$(date --iso-8601=seconds) DATENOW=$(date --iso-8601=seconds)
echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log
''; '';
system.configurationRevision = rev;
} }

View File

@@ -47,7 +47,8 @@ in {
]; ];
partitionName = [ partitionName = [
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" "owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP"
"all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP"
]; ];
# See slurm.conf(5) for more details about these options. # See slurm.conf(5) for more details about these options.
@@ -82,37 +83,9 @@ in {
# Reduce port range so we can allow only this range in the firewall # Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000 SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_Core
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
# This sets up the "extern" step into which ssh-launched processes will be
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
# when a task runs (srun) so we can ssh early.
PrologFlags=Alloc,Contain,X11
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
# adopted by the external step, similar to tasks running in regular steps
# LaunchParameters=ulimit_pam_adopt
SlurmdDebug=debug5
#DebugFlags=Protocol,Cgroup
'';
extraCgroupConfig = ''
CgroupPlugin=cgroup/v2
#ConstrainCores=yes
''; '';
}; };
# Place the slurm config in /etc as this will be required by PAM
environment.etc.slurm.source = config.services.slurm.etcSlurm;
age.secrets.mungeKey = { age.secrets.mungeKey = {
file = ../../secrets/munge-key.age; file = ../../secrets/munge-key.age;
owner = "munge"; owner = "munge";

View File

@@ -1,9 +0,0 @@
{
# Provides the base system for a xeon node in the SSF rack.
imports = [
./xeon.nix
./ssf/fs.nix
./ssf/net.nix
./ssf/ssh.nix
];
}

View File

@@ -1,8 +0,0 @@
{
# Mount the home via NFS
fileSystems."/home" = {
device = "10.0.40.30:/home";
fsType = "nfs";
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
}

View File

@@ -1,8 +0,0 @@
{
# Connect to intranet git hosts via proxy
programs.ssh.extraConfig = ''
# Connect to BSC machines via hut proxy too
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
ProxyCommand nc -X connect -x hut:23080 %h %p
'';
}

View File

@@ -1,18 +1,22 @@
{ lib, ... }: { lib, ... }:
let let
keys = import ../../../keys.nix; keys = import ../../keys.nix;
hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts; hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts;
in in
{ {
# Enable the OpenSSH daemon. # Enable the OpenSSH daemon.
services.openssh.enable = true; services.openssh.enable = true;
# Connect to intranet git hosts via proxy
programs.ssh.extraConfig = ''
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
User git
ProxyCommand nc -X connect -x localhost:23080 %h %p
'';
programs.ssh.knownHosts = hostsKeys // { programs.ssh.knownHosts = hostsKeys // {
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
"bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT";
"glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
"glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
}; };
} }

View File

@@ -1,10 +1,6 @@
{ pkgs, ... }: { pkgs, ... }:
{ {
imports = [
../../module/jungle-users.nix
];
users = { users = {
mutableUsers = false; mutableUsers = false;
users = { users = {
@@ -20,7 +16,6 @@
rarias = { rarias = {
uid = 1880; uid = 1880;
isNormalUser = true; isNormalUser = true;
linger = true;
home = "/home/Computational/rarias"; home = "/home/Computational/rarias";
description = "Rodrigo Arias"; description = "Rodrigo Arias";
group = "Computational"; group = "Computational";
@@ -40,23 +35,20 @@
home = "/home/Computational/arocanon"; home = "/home/Computational/arocanon";
description = "Aleix Roca"; description = "Aleix Roca";
group = "Computational"; group = "Computational";
extraGroups = [ "wheel" "tracing" ]; extraGroups = [ "wheel" ];
hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/"; hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland"
]; ];
}; };
};
jungleUsers = {
rpenacob = { rpenacob = {
uid = 2761; uid = 2761;
isNormalUser = true; isNormalUser = true;
home = "/home/Computational/rpenacob"; home = "/home/Computational/rpenacob";
description = "Raúl Peñacoba"; description = "Raúl Peñacoba";
group = "Computational"; group = "Computational";
hosts = [ "owl1" "owl2" "hut" "tent" ];
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/"; hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
@@ -69,82 +61,15 @@
home = "/home/Computational/anavarro"; home = "/home/Computational/anavarro";
description = "Antoni Navarro"; description = "Antoni Navarro";
group = "Computational"; group = "Computational";
hosts = [ "hut" "tent" "raccoon" "fox" ];
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
openssh.authorizedKeys.keys = [ openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
]; ];
}; };
abonerib = {
uid = 4541;
isNormalUser = true;
home = "/home/Computational/abonerib";
description = "Aleix Boné";
group = "Computational";
hosts = [ "owl1" "owl2" "hut" "tent" "raccoon" "fox" ];
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
];
};
vlopez = {
uid = 4334;
isNormalUser = true;
home = "/home/Computational/vlopez";
description = "Victor López";
group = "Computational";
hosts = [ "koro" ];
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
];
};
dbautist = {
uid = 5649;
isNormalUser = true;
home = "/home/Computational/dbautist";
description = "Dylan Bautista Cases";
group = "Computational";
hosts = [ "hut" "tent" "raccoon" ];
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
];
};
dalvare1 = {
uid = 2758;
isNormalUser = true;
home = "/home/Computational/dalvare1";
description = "David Álvarez";
group = "Computational";
hosts = [ "hut" "tent" "fox" ];
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
];
};
varcila = {
uid = 5650;
isNormalUser = true;
home = "/home/Computational/varcila";
description = "Vincent Arcila";
group = "Computational";
hosts = [ "hut" "tent" "fox" ];
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
];
};
}; };
groups = { groups = {
Computational = { gid = 564; }; Computational = { gid = 564; };
tracing = { };
}; };
}; };
} }

View File

@@ -1,7 +0,0 @@
{
# Provides the base system for a xeon node, not necessarily in the SSF rack.
imports = [
./base.nix
./xeon/console.nix
];
}

View File

@@ -1,14 +0,0 @@
{
# Restart the serial console
systemd.services."serial-getty@ttyS0" = {
enable = true;
wantedBy = [ "getty.target" ];
serviceConfig.Restart = "always";
};
# Enable serial console
boot.kernelParams = [
"console=tty1"
"console=ttyS0,115200"
];
}

View File

@@ -2,14 +2,14 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/main.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix") #(modulesPath + "/installer/netboot/netboot-minimal.nix")
./kernel/kernel.nix ./kernel/kernel.nix
./cpufreq.nix ./cpufreq.nix
./fs.nix ./fs.nix
./users.nix ./users.nix
../module/debuginfod.nix ./slurm.nix
]; ];
# Select this using the ID to avoid mismatches # Select this using the ID to avoid mismatches

View File

@@ -21,9 +21,9 @@ let
# configfile = if lockdep then ./configs/lockdep else ./configs/defconfig; # configfile = if lockdep then ./configs/lockdep else ./configs/defconfig;
#}; #};
kernel = nixos-fcs; kernel = nixos-fcsv3;
nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec { nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
version = "6.2.8"; version = "6.2.8";
src = builtins.fetchGit { src = builtins.fetchGit {
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
@@ -40,13 +40,35 @@ let
}; };
kernelPatches = []; kernelPatches = [];
extraMeta.branch = lib.versions.majorMinor version; extraMeta.branch = lib.versions.majorMinor version;
})); });
nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";}; nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";};
nixos-fcs-lockstat = nixos-fcs.override { nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";};
nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";};
# always use fcs_sched_setaffinity
#nixos-debug = nixos-fcs-kernel {gitCommit = "7d0bf285fca92badc8df3c9907a9ab30db4418aa";};
# remove need_check_cgroup
#nixos-debug = nixos-fcs-kernel {gitCommit = "4cc4efaab5e4a0bfa3089e935215b981c1922919";};
# merge again fcs_wake and fcs_wait
#nixos-debug = nixos-fcs-kernel {gitCommit = "40c6f72f4ae54b0b636b193ac0648fb5730c810d";};
# start from scratch, this is the working version with split fcs_wake and fcs_wait
nixos-debug = nixos-fcs-kernel {gitCommit = "c9a39d6a4ca83845b4e71fcc268fb0a76aff1bdf"; branch = "fcs-test"; };
nixos-fcsv1-lockstat = nixos-fcs-kernel {
gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";
lockStat = true; lockStat = true;
}; };
nixos-fcs-lockstat-preempt = nixos-fcs.override { nixos-fcsv2-lockstat = nixos-fcs-kernel {
gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";
lockStat = true;
};
nixos-fcsv3-lockstat = nixos-fcs-kernel {
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
lockStat = true;
};
nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel {
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
lockStat = true; lockStat = true;
preempt = true; preempt = true;
}; };

7
m/eudy/slurm.nix Normal file
View File

@@ -0,0 +1,7 @@
{ lib, ... }:
{
services.slurm = {
client.enable = lib.mkForce false;
};
}

View File

@@ -1,76 +0,0 @@
{ lib, config, pkgs, ... }:
{
imports = [
../common/base.nix
../common/xeon/console.nix
../module/emulation.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
# No swap, there is plenty of RAM
swapDevices = lib.mkForce [];
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
boot.kernelModules = [ "kvm-amd" ];
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
networking = {
defaultGateway = "147.83.30.130";
nameservers = [ "8.8.8.8" ];
hostName = "fox";
interfaces.enp1s0f0np0.ipv4.addresses = [
{
# UPC network
# Public IP configuration:
# - Hostname: fox.ac.upc.edu
# - IP: 147.83.30.141
# - Gateway: 147.83.30.130
# - NetMask: 255.255.255.192
# Private IP configuration for BMC:
# - Hostname: fox-ipmi.ac.upc.edu
# - IP: 147.83.35.27
# - Gateway: 147.83.35.2
# - NetMask: 255.255.255.0
address = "147.83.30.141";
prefixLength = 26; # 255.255.255.192
}
];
extraHosts = ''
147.83.30.141 fox.ac.upc.edu
147.83.35.27 fox-ipmi.ac.upc.edu
'';
};
# Configure Nvidia driver to use with CUDA
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
hardware.graphics.enable = true;
nixpkgs.config.allowUnfree = true;
nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
# Mount NVME disks
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
# Make a /nvme{0,1}/$USER directory for each user.
systemd.services.create-nvme-dirs = let
# Take only normal users in fox
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}"
]) users);
script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "local-fs.target" ];
after = [ "local-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
}

View File

@@ -1,162 +0,0 @@
modules:
http_2xx:
prober: http
timeout: 5s
http:
proxy_url: "http://127.0.0.1:23080"
skip_resolve_phase_with_proxy: true
follow_redirects: true
valid_status_codes: [] # Defaults to 2xx
method: GET
http_with_proxy:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
skip_resolve_phase_with_proxy: true
http_with_proxy_and_headers:
prober: http
http:
proxy_url: "http://127.0.0.1:3128"
proxy_connect_header:
Proxy-Authorization:
- Bearer token
http_post_2xx:
prober: http
timeout: 5s
http:
method: POST
headers:
Content-Type: application/json
body: '{}'
http_post_body_file:
prober: http
timeout: 5s
http:
method: POST
body_file: "/files/body.txt"
http_basic_auth_example:
prober: http
timeout: 5s
http:
method: POST
headers:
Host: "login.example.com"
basic_auth:
username: "username"
password: "mysecret"
http_2xx_oauth_client_credentials:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
follow_redirects: true
preferred_ip_protocol: "ip4"
valid_status_codes:
- 200
- 201
oauth2:
client_id: "client_id"
client_secret: "client_secret"
token_url: "https://api.example.com/token"
endpoint_params:
grant_type: "client_credentials"
http_custom_ca_example:
prober: http
http:
method: GET
tls_config:
ca_file: "/certs/my_cert.crt"
http_gzip:
prober: http
http:
method: GET
compression: gzip
http_gzip_with_accept_encoding:
prober: http
http:
method: GET
compression: gzip
headers:
Accept-Encoding: gzip
tls_connect:
prober: tcp
timeout: 5s
tcp:
tls: true
tcp_connect_example:
prober: tcp
timeout: 5s
imap_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "OK.*STARTTLS"
- send: ". STARTTLS"
- expect: "OK"
- starttls: true
- send: ". capability"
- expect: "CAPABILITY IMAP4rev1"
smtp_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "^220 ([^ ]+) ESMTP (.+)$"
- send: "EHLO prober\r"
- expect: "^250-STARTTLS"
- send: "STARTTLS\r"
- expect: "^220"
- starttls: true
- send: "EHLO prober\r"
- expect: "^250-AUTH"
- send: "QUIT\r"
irc_banner_example:
prober: tcp
timeout: 5s
tcp:
query_response:
- send: "NICK prober"
- send: "USER prober prober prober :prober"
- expect: "PING :([^ ]+)"
send: "PONG ${1}"
- expect: "^:[^ ]+ 001"
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"
dns_udp_example:
prober: dns
timeout: 5s
dns:
query_name: "www.prometheus.io"
query_type: "A"
valid_rcodes:
- NOERROR
validate_answer_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
fail_if_all_match_regexp:
- ".*127.0.0.1"
fail_if_not_matches_regexp:
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
fail_if_none_matches_regexp:
- "127.0.0.1"
validate_authority_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
validate_additional_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
dns_soa:
prober: dns
dns:
query_name: "prometheus.io"
query_type: "SOA"
dns_tcp_example:
prober: dns
dns:
transport_protocol: "tcp" # defaults to "udp"
preferred_ip_protocol: "ip4" # defaults to "ip6"
query_name: "www.prometheus.io"

View File

@@ -1,44 +1,22 @@
{ config, pkgs, lib, ... }: { config, pkgs, ... }:
{ {
imports = [ imports = [
../common/ssf.nix ../common/main.nix
../module/ceph.nix ../module/ceph.nix
../module/debuginfod.nix
../module/emulation.nix
../module/slurm-client.nix
./gitlab-runner.nix ./gitlab-runner.nix
./monitoring.nix ./monitoring.nix
./nfs.nix ./nfs.nix
./slurm-server.nix ./slurm-daemon.nix
./nix-serve.nix ./nix-serve.nix
./public-inbox.nix
./gitea.nix
./msmtp.nix
./postgresql.nix
./nginx.nix
./p.nix
#./pxe.nix #./pxe.nix
]; ];
boot.binfmt.emulatedSystems = [ "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ];
# Select the this using the ID to avoid mismatches # Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f"; boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";
fileSystems = {
"/" = lib.mkForce {
device = "/dev/disk/by-label/nvme";
fsType = "ext4";
neededForBoot = true;
options = [ "noatime" ];
};
"/boot" = lib.mkForce {
device = "/dev/disk/by-label/nixos-boot";
fsType = "ext4";
neededForBoot = true;
};
};
networking = { networking = {
hostName = "hut"; hostName = "hut";
@@ -50,20 +28,5 @@
address = "10.0.42.7"; address = "10.0.42.7";
prefixLength = 24; prefixLength = 24;
} ]; } ];
firewall = {
extraCommands = ''
# Accept all proxy traffic from compute nodes but not the login
iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept
'';
# Flush all rules and chains on stop so it won't break on start
extraStopCommands = ''
iptables -F
iptables -X
'';
};
}; };
# Allow proxy to bind to the ethernet interface
services.openssh.settings.GatewayPorts = "clientspecified";
} }

View File

@@ -1,63 +0,0 @@
{ config, lib, ... }:
{
age.secrets.giteaRunnerToken.file = ../../secrets/gitea-runner-token.age;
services.gitea = {
enable = true;
appName = "Gitea in the jungle";
settings = {
server = {
ROOT_URL = "https://jungle.bsc.es/git/";
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
LANDING_PAGE = "explore";
};
metrics.ENABLED = true;
service = {
REGISTER_MANUAL_CONFIRM = true;
ENABLE_NOTIFY_MAIL = true;
};
log.LEVEL = "Warn";
mailer = {
ENABLED = true;
FROM = "jungle-robot@bsc.es";
PROTOCOL = "sendmail";
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
SENDMAIL_ARGS = "--";
};
};
};
services.gitea-actions-runner.instances = {
runrun = {
enable = true;
name = "runrun";
url = "https://jungle.bsc.es/git/";
tokenFile = config.age.secrets.giteaRunnerToken.path;
labels = [ "native:host" ];
settings.runner.capacity = 8;
};
};
systemd.services.gitea-runner-runrun = {
path = [ "/run/current-system/sw" ];
serviceConfig = {
# DynamicUser doesn't work well with SSH
DynamicUser = lib.mkForce false;
User = "gitea-runner";
Group = "gitea-runner";
};
};
users.users.gitea-runner = {
isSystemUser = true;
home = "/var/lib/gitea-runner";
description = "Gitea Runner";
group = "gitea-runner";
extraGroups = [ "docker" ];
createHome = true;
};
users.groups.gitea-runner = {};
}

View File

@@ -1,111 +1,54 @@
{ pkgs, lib, config, ... }: { pkgs, lib, config, ... }:
{ {
age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age; age.secrets.ovniToken.file = ../../secrets/ovni-token.age;
age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age; age.secrets.nosvToken.file = ../../secrets/nosv-token.age;
age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age;
services.gitlab-runner = { services.gitlab-runner = {
enable = true; enable = true;
settings.concurrent = 5; settings.concurrent = 5;
services = let services = {
common-shell = { ovni-shell = {
registrationConfigFile = config.age.secrets.ovniToken.path;
executor = "shell"; executor = "shell";
tagList = [ "nix" "xeon" ];
registrationFlags = [
# Using space doesn't work, and causes it to misread the next flag
"--locked='false'"
];
environmentVariables = { environmentVariables = {
SHELL = "${pkgs.bash}/bin/bash"; SHELL = "${pkgs.bash}/bin/bash";
}; };
}; };
common-docker = { ovni-docker = {
executor = "docker"; registrationConfigFile = config.age.secrets.ovniToken.path;
dockerImage = "debian:stable"; dockerImage = "debian:stable";
tagList = [ "docker" "xeon" ];
registrationFlags = [ registrationFlags = [
"--locked='false'"
"--docker-network-mode host" "--docker-network-mode host"
]; ];
environmentVariables = { environmentVariables = {
https_proxy = "http://hut:23080"; https_proxy = "http://localhost:23080";
http_proxy = "http://hut:23080"; http_proxy = "http://localhost:23080";
}; };
}; };
in { nosv-docker = {
# For pm.bsc.es/gitlab registrationConfigFile = config.age.secrets.nosvToken.path;
gitlab-pm-shell = common-shell // { dockerImage = "debian:stable";
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path; tagList = [ "docker" "xeon" ];
};
gitlab-pm-docker = common-docker // {
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path;
};
gitlab-bsc-docker = {
# gitlab.bsc.es still uses the old token mechanism
registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path;
tagList = [ "docker" "hut" ];
environmentVariables = {
# We cannot access the hut local interface from docker, so we connect
# to hut directly via the ethernet one.
https_proxy = "http://hut:23080";
http_proxy = "http://hut:23080";
};
executor = "docker";
dockerImage = "alpine";
dockerVolumes = [
"/nix/store:/nix/store:ro"
"/nix/var/nix/db:/nix/var/nix/db:ro"
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
];
dockerExtraHosts = [
# Required to pass the proxy via hut
"hut:10.0.40.7"
];
dockerDisableCache = true;
registrationFlags = [ registrationFlags = [
# Increase build log length to 64 MiB "--docker-network-mode host"
"--output-limit 65536" "--docker-cpus 56"
]; ];
preBuildScript = pkgs.writeScript "setup-container" ''
mkdir -p -m 0755 /nix/var/log/nix/drvs
mkdir -p -m 0755 /nix/var/nix/gcroots
mkdir -p -m 0755 /nix/var/nix/profiles
mkdir -p -m 0755 /nix/var/nix/temproots
mkdir -p -m 0755 /nix/var/nix/userpool
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
mkdir -p -m 0700 "$HOME/.nix-defexpr"
mkdir -p -m 0700 "$HOME/.ssh"
cat > "$HOME/.ssh/config" << EOF
Host bscpm04.bsc.es gitlab-internal.bsc.es
User git
ProxyCommand nc -X connect -x hut:23080 %h %p
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
ProxyCommand nc -X connect -x hut:23080 %h %p
EOF
cat >> "$HOME/.ssh/known_hosts" << EOF
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
EOF
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
# Required to load SSL certificate paths
. ${pkgs.cacert}/nix-support/setup-hook
'';
environmentVariables = { environmentVariables = {
ENV = "/etc/profile"; https_proxy = "http://localhost:23080";
USER = "root"; http_proxy = "http://localhost:23080";
NIX_REMOTE = "daemon";
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
}; };
}; };
}; };
}; };
# DOCKER* chains are useless, override at FORWARD and nixos-fw
networking.firewall.extraCommands = ''
# Don't forward any traffic from docker
iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse
# Allow incoming traffic from docker to 23080
iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT
'';
#systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash";
systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false;
systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner"; systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner";

View File

@@ -1,31 +0,0 @@
{ pkgs, config, lib, ... }:
let
gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { }
''
cp ${./gpfs-probe.sh} $out;
chmod +x $out
''
;
in
{
# Use a new user to handle the SSH keys
users.groups.ssh-robot = { };
users.users.ssh-robot = {
description = "SSH Robot";
isNormalUser = true;
home = "/var/lib/ssh-robot";
};
systemd.services.gpfs-probe = {
description = "Daemon to report GPFS latency via SSH";
path = [ pkgs.openssh pkgs.netcat ];
after = [ "network.target" ];
wantedBy = [ "default.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}";
User = "ssh-robot";
Group = "ssh-robot";
};
};
}

View File

@@ -1,18 +0,0 @@
#!/bin/sh
N=500
t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
if [ -z "$t" ]; then
t="5.00"
fi
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
# HELP gpfs_touch_latency Time to create $N files.
# TYPE gpfs_touch_latency gauge
gpfs_touch_latency $t
EOF

13
m/hut/ipmi.yml Normal file
View File

@@ -0,0 +1,13 @@
modules:
default:
collectors:
- bmc
- ipmi
- chassis
lan:
collectors:
- ipmi
- chassis
user: ""
pass: ""

View File

@@ -1,22 +1,6 @@
{ config, lib, ... }: { config, lib, ... }:
{ {
imports = [
../module/slurm-exporter.nix
../module/meteocat-exporter.nix
../module/upc-qaire-exporter.nix
./gpfs-probe.nix
./nix-daemon-exporter.nix
];
age.secrets.grafanaJungleRobotPassword = {
file = ../../secrets/jungle-robot-password.age;
owner = "grafana";
mode = "400";
};
age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age;
services.grafana = { services.grafana = {
enable = true; enable = true;
settings = { settings = {
@@ -27,29 +11,14 @@
http_port = 2342; http_port = 2342;
http_addr = "127.0.0.1"; http_addr = "127.0.0.1";
}; };
smtp = {
enabled = true;
from_address = "jungle-robot@bsc.es";
user = "jungle-robot";
# Read the password from a file, which is only readable by grafana user
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
host = "mail.bsc.es:465";
startTLS_policy = "NoStartTLS";
};
feature_toggles.publicDashboards = true; feature_toggles.publicDashboards = true;
"auth.anonymous".enabled = true;
log.level = "warn";
}; };
}; };
# Make grafana alerts also use the proxy
systemd.services.grafana.environment = config.networking.proxy.envVars;
services.prometheus = { services.prometheus = {
enable = true; enable = true;
port = 9001; port = 9001;
retentionTime = "5y"; retentionTime = "1y";
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
}; };
@@ -78,13 +47,13 @@
enable = true; enable = true;
group = "root"; group = "root";
user = "root"; user = "root";
configFile = config.age.secrets.ipmiYml.path; configFile = ./ipmi.yml;
# extraFlags = [ "--log.level=debug" ]; #extraFlags = [ "--log.level=debug" ];
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
}; };
node = { node = {
enable = true; enable = true;
enabledCollectors = [ "systemd" "logind" ]; enabledCollectors = [ "systemd" ];
port = 9002; port = 9002;
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
}; };
@@ -92,11 +61,6 @@
enable = true; enable = true;
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
}; };
blackbox = {
enable = true;
listenAddress = "127.0.0.1";
configFile = ./blackbox.yml;
};
}; };
scrapeConfigs = [ scrapeConfigs = [
@@ -109,12 +73,6 @@
"127.0.0.1:9323" "127.0.0.1:9323"
"127.0.0.1:9252" "127.0.0.1:9252"
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
"127.0.0.1:9341" # Slurm exporter
"127.0.0.1:9966" # GPFS custom exporter
"127.0.0.1:9999" # Nix-daemon custom exporter
"127.0.0.1:9929" # Meteocat custom exporter
"127.0.0.1:9928" # UPC Qaire custom exporter
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
]; ];
}]; }];
} }
@@ -128,74 +86,6 @@
]; ];
}]; }];
} }
{
job_name = "blackbox-http";
metrics_path = "/probe";
params = { module = [ "http_2xx" ]; };
static_configs = [{
targets = [
"https://www.google.com/robots.txt"
"https://pm.bsc.es/"
"https://pm.bsc.es/gitlab/"
"https://jungle.bsc.es/"
"https://gitlab.bsc.es/"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
}
];
}
{
job_name = "blackbox-icmp";
metrics_path = "/probe";
params = { module = [ "icmp" ]; };
static_configs = [{
targets = [
"1.1.1.1"
"8.8.8.8"
"ssfhead"
"anella-bsc.cesca.cat"
"upc-anella.cesca.cat"
"fox.ac.upc.edu"
"arenys5.ac.upc.edu"
];
}];
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
# Shows the host target address instead of the blackbox address
target_label = "__address__";
replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
}
];
}
{
job_name = "gitea";
static_configs = [{ targets = [ "127.0.0.1:3000" ]; }];
}
{ {
# Scrape the IPMI info of the hosts remotely via LAN # Scrape the IPMI info of the hosts remotely via LAN
job_name = "ipmi-lan"; job_name = "ipmi-lan";
@@ -217,7 +107,7 @@
# Sets the "instance" label with the remote host we are querying # Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ]; source_labels = [ "__param_target" ];
separator = ";"; separator = ";";
regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end regex = "(.*)";
target_label = "instance"; target_label = "instance";
replacement = "\${1}"; replacement = "\${1}";
action = "replace"; action = "replace";
@@ -248,25 +138,6 @@
} }
]; ];
} }
{
job_name = "ipmi-raccoon";
metrics_path = "/ipmi";
static_configs = [
{ targets = [ "127.0.0.1:9291" ]; }
];
params = {
target = [ "84.88.51.142" ];
module = [ "raccoon" ];
};
}
{
job_name = "raccoon";
static_configs = [
{
targets = [ "127.0.0.1:19002" ]; # Node exporter
}
];
}
]; ];
}; };
} }

View File

@@ -1,24 +0,0 @@
{ config, lib, ... }:
{
age.secrets.jungleRobotPassword = {
file = ../../secrets/jungle-robot-password.age;
group = "gitea";
mode = "440";
};
programs.msmtp = {
enable = true;
accounts = {
default = {
auth = true;
tls = true;
tls_starttls = false;
port = 465;
host = "mail.bsc.es";
user = "jungle-robot";
passwordeval = "cat ${config.age.secrets.jungleRobotPassword.path}";
from = "jungle-robot@bsc.es";
};
};
};
}

View File

@@ -1,73 +0,0 @@
{ theFlake, pkgs, ... }:
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = theFlake;
buildInputs = [ pkgs.hugo ];
buildPhase = ''
cd web
rm -rf public/
hugo
'';
installPhase = ''
cp -r public $out
'';
# Don't mess doc/
dontFixup = true;
};
in
{
networking.firewall.allowedTCPPorts = [ 80 ];
services.nginx = {
enable = true;
virtualHosts."jungle.bsc.es" = {
root = "${website}";
listen = [
{
addr = "0.0.0.0";
port = 80;
}
];
extraConfig = ''
set_real_ip_from 127.0.0.1;
set_real_ip_from 84.88.52.107;
real_ip_recursive on;
real_ip_header X-Forwarded-For;
location /git {
rewrite ^/git$ / break;
rewrite ^/git/(.*) /$1 break;
proxy_pass http://127.0.0.1:3000;
proxy_redirect http:// $scheme://;
}
location /cache {
rewrite ^/cache/(.*) /$1 break;
proxy_pass http://127.0.0.1:5000;
proxy_redirect http:// $scheme://;
}
location /lists {
proxy_pass http://127.0.0.1:8081;
proxy_redirect http:// $scheme://;
}
location /grafana {
proxy_pass http://127.0.0.1:2342;
proxy_redirect http:// $scheme://;
proxy_set_header Host $host;
# Websockets
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ ^/~(.+?)(/.*)?$ {
alias /ceph/home/$1/public_html$2;
index index.html index.htm;
autoindex on;
absolute_redirect off;
}
location /p/ {
alias /ceph/p/;
}
'';
};
};
}

View File

@@ -1,26 +0,0 @@
#!/bin/sh
# Locate nix daemon pid
nd=$(pgrep -o nix-daemon)
# Locate children of nix-daemon
pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children")
# For each children, locate 2nd level children
pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n')
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
# HELP nix_daemon_build Nix daemon derivation build state.
# TYPE nix_daemon_build gauge
EOF
for pid in $pids2; do
name=$(cat /proc/$pid/environ 2>/dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.')
user=$(ps -o uname= -p "$pid")
if [ -n "$name" -a -n "$user" ]; then
printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name"
fi
done

View File

@@ -1,23 +0,0 @@
{ pkgs, config, lib, ... }:
let
script = pkgs.runCommand "nix-daemon-exporter.sh" { }
''
cp ${./nix-daemon-builds.sh} $out;
chmod +x $out
''
;
in
{
systemd.services.nix-daemon-exporter = {
description = "Daemon to export nix-daemon metrics";
path = [ pkgs.procps pkgs.ripgrep ];
wantedBy = [ "default.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}";
# Needed root to read the environment, potentially unsafe
User = "root";
Group = "root";
};
};
}

View File

@@ -1,43 +0,0 @@
{ pkgs, lib, config, ... }:
let
p = pkgs.writeShellScriptBin "p" ''
set -e
cd /ceph
pastedir="p/$USER"
mkdir -p "$pastedir"
ext="txt"
if [ -n "$1" ]; then
ext="$1"
fi
out=$(mktemp "$pastedir/XXXXXXXX.$ext")
cat > "$out"
chmod go+r "$out"
echo "https://jungle.bsc.es/$out"
'';
in
{
environment.systemPackages = with pkgs; [ p ];
# Make sure we have a directory per user. We cannot use the nice
# systemd-tmpfiles-setup.service service because this is a remote FS, and it
# may not be mounted when it runs.
systemd.services.create-paste-dirs = let
# Take only normal users in hut
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}"
]) users);
script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "remote-fs.target" ];
after = [ "remote-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
}

View File

@@ -1,19 +0,0 @@
{ lib, ... }:
{
services.postgresql = {
enable = true;
ensureDatabases = [ "perftestsdb" ];
ensureUsers = [
{ name = "anavarro"; ensureClauses.superuser = true; }
{ name = "rarias"; ensureClauses.superuser = true; }
{ name = "grafana"; }
];
authentication = ''
#type database DBuser auth-method
local perftestsdb rarias trust
local perftestsdb anavarro trust
local perftestsdb grafana trust
'';
};
}

View File

@@ -1,79 +0,0 @@
/*
* CC0-1.0 <https://creativecommons.org/publicdomain/zero/1.0/legalcode>
* Dark color scheme using 216 web-safe colors, inspired
* somewhat by the default color scheme in mutt.
* It reduces eyestrain for me, and energy usage for all:
* https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
*/
* {
font-size: 14px;
font-family: monospace;
}
pre {
white-space: pre-wrap;
padding: 10px;
background: #f5f5f5;
}
hr {
margin: 30px 0;
}
body {
max-width: 120ex; /* 120 columns wide */
margin: 50px auto;
}
/*
* Underlined links add visual noise which make them hard-to-read.
* Use colors to make them stand out, instead.
*/
a:link {
color: #007;
text-decoration: none;
}
a:visited {
color:#504;
}
a:hover {
text-decoration: underline;
}
/* quoted text in emails gets a different color */
*.q { color:gray }
/*
* these may be used with cgit <https://git.zx2c4.com/cgit/>, too.
* (cgit uses <div>, public-inbox uses <span>)
*/
*.add { color:darkgreen } /* diff post-image lines */
*.del { color:darkred } /* diff pre-image lines */
*.head { color:black } /* diff header (metainformation) */
*.hunk { color:gray } /* diff hunk-header */
/*
* highlight 3.x colors (tested 3.18) for displaying blobs.
* This doesn't use most of the colors available, as I find too
* many colors overwhelming, so the default is commented out.
*/
.hl.num { color:#f30 } /* number */
.hl.esc { color:#f0f } /* escape character */
.hl.str { color:#f30 } /* string */
.hl.ppc { color:#f0f } /* preprocessor */
.hl.pps { color:#f30 } /* preprocessor string */
.hl.slc { color:#09f } /* single-line comment */
.hl.com { color:#09f } /* multi-line comment */
/* .hl.opt { color:#ccc } */ /* operator */
/* .hl.ipl { color:#ccc } */ /* interpolation */
/* keyword groups kw[a-z] */
.hl.kwa { color:#ff0 }
.hl.kwb { color:#0f0 }
.hl.kwc { color:#ff0 }
/* .hl.kwd { color:#ccc } */
/* line-number (unused by public-inbox) */
/* .hl.lin { color:#ccc } */

View File

@@ -1,47 +0,0 @@
{ lib, ... }:
{
services.public-inbox = {
enable = true;
http = {
enable = true;
port = 8081;
mounts = [ "/lists" ];
};
settings.publicinbox = {
css = [ "${./public-inbox.css}" ];
wwwlisting = "all";
};
inboxes = {
bscpkgs = {
url = "https://jungle.bsc.es/lists/bscpkgs";
address = [ "~rodarima/bscpkgs@lists.sr.ht" ];
watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ];
description = "Patches for bscpkgs";
listid = "~rodarima/bscpkgs.lists.sr.ht";
};
jungle = {
url = "https://jungle.bsc.es/lists/jungle";
address = [ "~rodarima/jungle@lists.sr.ht" ];
watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ];
description = "Patches for jungle";
listid = "~rodarima/jungle.lists.sr.ht";
};
};
};
# We need access to the network for the watch service, as we will fetch the
# emails directly from the IMAP server.
systemd.services.public-inbox-watch.serviceConfig = {
PrivateNetwork = lib.mkForce false;
RestrictAddressFamilies = lib.mkForce [ "AF_UNIX" "AF_INET" "AF_INET6" ];
KillSignal = "SIGKILL"; # Avoid slow shutdown
# Required for chmod(..., 02750) on directories by git, from
# systemd.exec(8):
# > Note that this restricts marking of any type of file system object with
# > these bits, including both regular files and directories (where the SGID
# > is a different meaning than for files, see documentation).
RestrictSUIDSGID = lib.mkForce false;
};
}

View File

@@ -1,15 +1,15 @@
- targets: - targets:
- owl1-ipmi - 10.0.40.101
- owl2-ipmi - 10.0.40.102
- xeon03-ipmi - 10.0.40.103
- xeon04-ipmi - 10.0.40.104
- koro-ipmi - 10.0.40.105
- xeon06-ipmi - 10.0.40.106
- hut-ipmi - 10.0.40.107
- eudy-ipmi - 10.0.40.108
# Storage # Storage
- bay-ipmi - 10.0.40.141
- oss01-ipmi - 10.0.40.142
- lake2-ipmi - 10.0.40.143
labels: labels:
job: ipmi-lan job: ipmi-lan

View File

@@ -2,11 +2,13 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/main.nix
#(modulesPath + "/installer/netboot/netboot-minimal.nix") #(modulesPath + "/installer/netboot/netboot-minimal.nix")
../eudy/cpufreq.nix ../eudy/cpufreq.nix
../eudy/users.nix ../eudy/users.nix
../eudy/slurm.nix
./users.nix
./kernel.nix ./kernel.nix
]; ];

View File

@@ -1,29 +1,9 @@
{ pkgs, lib, ... }: { pkgs, lib, ... }:
let let
#fcs-devel = pkgs.linuxPackages_custom { kernel = nixos-fcsv4;
# version = "6.2.8";
# src = /mnt/data/kernel/fcs/kernel/src;
# configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig;
#};
#fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false; nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
#fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false;
#fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true;
#fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true;
#fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom {
# version = "6.2.8";
# src = builtins.fetchGit {
# url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
# rev = gitCommit;
# ref = "fcs";
# };
# configfile = if lockdep then ./configs/lockdep else ./configs/defconfig;
#};
kernel = nixos-fcs;
nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
version = "6.2.8"; version = "6.2.8";
src = builtins.fetchGit { src = builtins.fetchGit {
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git"; url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
@@ -40,13 +20,27 @@ let
}; };
kernelPatches = []; kernelPatches = [];
extraMeta.branch = lib.versions.majorMinor version; extraMeta.branch = lib.versions.majorMinor version;
})); });
nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";}; nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";};
nixos-fcs-lockstat = nixos-fcs.override { nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";};
nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";};
nixos-fcsv4 = nixos-fcs-kernel {gitCommit = "c94c3d946f33ac3e5782a02ee002cc1164c0cb4f";};
nixos-fcsv1-lockstat = nixos-fcs-kernel {
gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";
lockStat = true; lockStat = true;
}; };
nixos-fcs-lockstat-preempt = nixos-fcs.override { nixos-fcsv2-lockstat = nixos-fcs-kernel {
gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";
lockStat = true;
};
nixos-fcsv3-lockstat = nixos-fcs-kernel {
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
lockStat = true;
};
nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel {
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
lockStat = true; lockStat = true;
preempt = true; preempt = true;
}; };
@@ -66,5 +60,5 @@ in {
# enable memory overcommit, needed to build a taglibc system using nix after # enable memory overcommit, needed to build a taglibc system using nix after
# increasing the openblas memory footprint # increasing the openblas memory footprint
boot.kernel.sysctl."vm.overcommit_memory" = 1; boot.kernel.sysctl."vm.overcommit_memory" = lib.mkForce 1;
} }

17
m/koro/users.nix Normal file
View File

@@ -0,0 +1,17 @@
{ ... }:
{
users.users = {
vlopez = {
uid = 4334;
isNormalUser = true;
home = "/home/Computational/vlopez";
description = "Victor López";
group = "Computational";
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
];
};
};
}

View File

@@ -2,20 +2,20 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/main.nix
../module/monitoring.nix ../common/monitoring.nix
]; ];
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
boot.kernel.sysctl = {
"kernel.yama.ptrace_scope" = lib.mkForce "1";
};
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
ceph ceph
]; ];
services.slurm = {
client.enable = lib.mkForce false;
};
services.ceph = { services.ceph = {
enable = true; enable = true;
global = { global = {
@@ -49,16 +49,6 @@
address = "10.0.42.42"; address = "10.0.42.42";
prefixLength = 24; prefixLength = 24;
} ]; } ];
firewall = {
extraCommands = ''
# Accept all incoming TCP traffic from bay
iptables -A nixos-fw -p tcp -s bay -j nixos-fw-accept
# Accept monitoring requests from hut
iptables -A nixos-fw -p tcp -s hut --dport 9002 -j nixos-fw-accept
# Accept all Ceph traffic from the local network
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept
'';
};
}; };
# Missing service for volumes, see: # Missing service for volumes, see:

View File

@@ -1,70 +0,0 @@
{
# In physical order from top to bottom (see note below)
ssf = {
# Switches for Ethernet and OmniPath
switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; };
switch-opa = { pos=41; size=1; };
# SSF login
ssfhead = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="operations@bsc.es"; };
# Storage
bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; };
lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; };
# Compute xeon
owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; };
owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; };
xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
# Slot 34 empty
koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
xeon06 = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };
# 16 KNL nodes, 4 per chassis
knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; };
knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; };
knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; };
knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; };
# Slot 19 empty
# EPI (hw team, guessed order)
epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; };
epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; };
epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; };
anon = { pos=14; size=2; }; # Unlabeled machine. Operative
# These are old and decommissioned (off)
power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; };
powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; };
gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; };
odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; };
amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic
moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; };
};
bsc2218 = {
raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; };
tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; };
};
upc = {
fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; };
};
# NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the
# bottom. Example:
#
# | ... | - [pos+size] <--- Label in chassis
# +--------+
# | node | - [pos+1]
# | 2U | - [pos]
# +------- +
# | ... | - [pos-1]
#
# NOTE: The board and sn refers to the FRU information (Board Product and
# Board Serial) via `ipmitool fru print 0`.
}

View File

@@ -3,6 +3,7 @@
# Mounts the /ceph filesystem at boot # Mounts the /ceph filesystem at boot
{ {
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
ceph
ceph-client ceph-client
fio # For benchmarks fio # For benchmarks
]; ];

View File

@@ -1,3 +0,0 @@
{
services.nixseparatedebuginfod.enable = true;
}

View File

@@ -1,3 +0,0 @@
{
boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ];
}

View File

@@ -1,10 +0,0 @@
{ config, ... }:
{
nix.settings =
# Don't add hut as a cache to itself
assert config.networking.hostName != "hut";
{
substituters = [ "http://hut/cache" ];
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
}

View File

@@ -1,24 +0,0 @@
{ config, lib, ... }:
with lib;
{
options = {
users.jungleUsers = mkOption {
type = types.attrsOf (types.anything // { check = (x: x ? "hosts"); });
description = ''
Same as users.users but with the extra `hosts` attribute, which controls
access to the nodes by `networking.hostName`.
'';
};
};
config = let
allowedUser = host: userConf: builtins.elem host userConf.hosts;
filterUsers = host: users: filterAttrs (n: v: allowedUser host v) users;
removeHosts = users: mapAttrs (n: v: builtins.removeAttrs v [ "hosts" ]) users;
currentHost = config.networking.hostName;
in {
users.users = removeHosts (filterUsers currentHost config.users.jungleUsers);
};
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-meteocat-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter";
};
};
}

View File

@@ -0,0 +1,69 @@
{ ... }:
{
# Don't make the nix store read-only, as this would prevent the overlay FS
# from being able to mount it.
boot.readOnlyNixStore = false;
# The nix-daemon.socket has an unnecessary dependency over the /nix/store
# mount point. But that mount point won't be provided until the network is
# ready. However, the network-address-eno1.service, has a dependency over
# sockets.target, causing a cycle.
# One solution is to make the nix-daemon.socket depend only on the socket
# patch (which is already covered by ConditionPathIsReadWrite =
# /nix/var/nix/daemon-socket), instead on the /nix/store.
#
# Using systemd.sockets.nix-daemon.unitConfig.RequiresMountsFor =
# "/nix/var/nix/daemon-socket" doesn't work, as the the mount options get
# added by systemd when the override config is merged with the one that Nix
# provides:
#
# owl2% sudo systemctl show nix-daemon.socket | grep RequiresMountsFor
# RequiresMountsFor=/nix/store /nix/var/nix/daemon-socket/socket /nix/var/nix/daemon-socket
#
# To fix this, the Nix package is patched to only depend on /nix/var instead.
# See ../../pkgs/overlay.nix for details.
# Mount the hut nix store via NFS in read-only mode.
fileSystems."/mnt/hut-nix-store" = {
device = "hut:/nix/store";
fsType = "nfs";
options = [ "ro" ];
};
# A workdir is also needed, so setup a permanent dir using tmpfiles.
systemd.tmpfiles.rules = [
"d /mnt/nix-work 0700 root root -"
];
# Mount an overlay in /nix/store using as lower layer the NFS store and upper
# layer the disk nix store. The destination is still the nix store in
# /nix/store (confusing). We need rw access, as the daemon need to write the
# lock files to build derivations locally. Use a systemd mount unit directly
# so we can specify the LazyUmount option and we avoid having it mounted
# in the stage1 before systemd.
systemd.mounts = [
{
what = "overlay";
type = "overlay";
where = "/nix/store";
# We need the local-fs.target to be ready, so the network interfaces can
# be configured to the network.target is reached. So make this a netdev
# mount.
options = "_netdev,lowerdir=/mnt/hut-nix-store,upperdir=/nix/store,workdir=/mnt/nix-work";
description = "Overlay /nix/store mount";
mountConfig = {
LazyUnmount = true;
};
# Run the unit after remote-fs-pre.target but before the remote-fs.target
after = [ "remote-fs-pre.target"];
before = [ "umount.target" "remote-fs.target" ];
# Install by using wantedBy over remote-fs.target
wantedBy = [ "remote-fs.target" ];
unitConfig = {
# We need to wait for the NFS mount
RequiresMountsFor = "/nix/store /mnt/hut-nix-store";
};
}
];
}

View File

@@ -1,28 +0,0 @@
{ config, lib, pkgs, ... }:
# See also: https://github.com/NixOS/nixpkgs/pull/112010
# And: https://github.com/NixOS/nixpkgs/pull/115839
with lib;
{
systemd.services."prometheus-slurm-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = ''
${pkgs.prometheus-slurm-exporter}/bin/prometheus-slurm-exporter --listen-address "127.0.0.1:9341"
'';
Environment = [
"PATH=${pkgs.slurm}/bin"
# We need to specify the slurm config to be able to talk to the slurmd
# daemon.
"SLURM_CONF=${config.services.slurm.etcSlurm}/slurm.conf"
];
};
};
}

View File

@@ -1,19 +0,0 @@
{ ... }:
{
# Mount the hut nix store via NFS
fileSystems."/mnt/hut-nix-store" = {
device = "hut:/nix/store";
fsType = "nfs";
options = [ "ro" ];
};
systemd.services.slurmd.serviceConfig = {
# When running a job, bind the hut store in /nix/store so the paths are
# available too.
# FIXME: This doesn't keep the programs in /run/current-system/sw/bin
# available in the store. Ideally they should be merged but the overlay FS
# doesn't work when the underlying directories change.
BindReadOnlyPaths = "/mnt/hut-nix-store:/nix/store";
};
}

View File

@@ -1,9 +0,0 @@
{
programs.ssh.extraConfig = ''
Host ssfhead
HostName ssflogin.bsc.es
Host hut
ProxyJump ssfhead
HostName xeon07
'';
}

View File

@@ -1,17 +0,0 @@
{ config, lib, pkgs, ... }:
with lib;
{
systemd.services."prometheus-upc-qaire-exporter" = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Restart = mkDefault "always";
PrivateTmp = mkDefault true;
WorkingDirectory = mkDefault "/tmp";
DynamicUser = mkDefault true;
ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter";
};
};
}

View File

@@ -2,13 +2,10 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/main.nix
../module/ceph.nix ../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix ../module/slurm-firewall.nix
../module/debuginfod.nix ../module/shared-nix-store.nix
../module/hut-substituter.nix
]; ];
# Select the this using the ID to avoid mismatches # Select the this using the ID to avoid mismatches

View File

@@ -2,13 +2,10 @@
{ {
imports = [ imports = [
../common/ssf.nix ../common/main.nix
../module/ceph.nix ../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix ../module/slurm-firewall.nix
../module/debuginfod.nix ../module/shared-nix-store.nix
../module/hut-substituter.nix
]; ];
# Select the this using the ID to avoid mismatches # Select the this using the ID to avoid mismatches

View File

@@ -1,100 +0,0 @@
{ config, pkgs, lib, modulesPath, ... }:
{
imports = [
../common/base.nix
../module/emulation.nix
../module/debuginfod.nix
../module/ssh-hut-extern.nix
../eudy/kernel/perf.nix
];
# Don't install Grub on the disk yet
boot.loader.grub.device = "nodev";
# Enable serial console
boot.kernelParams = [
"console=tty1"
"console=ttyS1,115200"
];
networking = {
hostName = "raccoon";
# Only BSC DNSs seem to be reachable from the office VLAN
nameservers = [ "84.88.52.35" "84.88.52.36" ];
defaultGateway = "84.88.51.129";
interfaces.eno0.ipv4.addresses = [ {
address = "84.88.51.152";
prefixLength = 25;
} ];
interfaces.enp5s0f1.ipv4.addresses = [ {
address = "10.0.44.1";
prefixLength = 24;
} ];
nat = {
enable = true;
internalInterfaces = [ "enp5s0f1" ];
externalInterface = "eno0";
};
hosts = {
"10.0.44.4" = [ "tent" ];
};
};
nix.settings = {
substituters = [ "https://jungle.bsc.es/cache" ];
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
# Enable performance governor
powerManagement.cpuFreqGovernor = "performance";
# Configure Nvidia driver to use with CUDA
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
hardware.graphics.enable = true;
nixpkgs.config.allowUnfree = true;
nixpkgs.config.nvidia.acceptLicense = true;
services.xserver.videoDrivers = [ "nvidia" ];
# Disable garbage collection for now
nix.gc.automatic = lib.mkForce false;
services.openssh.settings.X11Forwarding = true;
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
users.motd = ''
DO YOU BRING FEEDS?
'';
}

View File

@@ -1,70 +0,0 @@
{ config, pkgs, lib, ... }:
{
imports = [
../common/xeon.nix
../module/emulation.nix
../module/debuginfod.nix
../module/ssh-hut-extern.nix
];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675";
networking = {
hostName = "tent";
interfaces.eno1.ipv4.addresses = [
{
address = "10.0.44.4";
prefixLength = 24;
}
];
# Only BSC DNSs seem to be reachable from the office VLAN
nameservers = [ "84.88.52.35" "84.88.52.36" ];
defaultGateway = "10.0.44.1";
};
nix.settings = {
substituters = [ "https://jungle.bsc.es/cache" ];
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
listenAddress = "127.0.0.1";
};
boot.swraid = {
enable = true;
mdadmConf = ''
DEVICE partitions
ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d
MAILADDR root
'';
};
fileSystems."/vault" = {
device = "/dev/disk/by-label/vault";
fsType = "ext4";
};
# Make a /vault/$USER directory for each user.
systemd.services.create-vault-dirs = let
# Take only normal users in tent
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
commands = lib.concatLists (lib.mapAttrsToList
(_: user: [
"install -d -o ${user.name} -g ${user.group} -m 0711 /vault/${user.name}"
]) users);
script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands);
in {
enable = true;
wants = [ "local-fs.target" ];
after = [ "local-fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = script;
};
}

405
pkgs/ceph.nix Normal file
View File

@@ -0,0 +1,405 @@
{ lib
, stdenv
, runCommand
, fetchurl
, fetchFromGitHub
, fetchPypi
# Build time
, cmake
, ensureNewerSourcesHook
, fmt
, git
, makeWrapper
, nasm
, pkg-config
, which
# Tests
, nixosTests
# Runtime dependencies
, arrow-cpp
, babeltrace
, boost179
, bzip2
, cryptsetup
, cunit
, doxygen
, gperf
, graphviz
, gtest
, icu
, libcap
, libcap_ng
, libnl
, libxml2
, lttng-ust
, lua
, lz4
, oath-toolkit
, openldap
, python310
, rdkafka
, rocksdb
, snappy
, sqlite
, utf8proc
, zlib
, zstd
# Optional Dependencies
, curl ? null
, expat ? null
, fuse ? null
, libatomic_ops ? null
, libedit ? null
, libs3 ? null
, yasm ? null
# Mallocs
, gperftools ? null
, jemalloc ? null
# Crypto Dependencies
, cryptopp ? null
, nspr ? null
, nss ? null
# Linux Only Dependencies
, linuxHeaders
, util-linux
, libuuid
, udev
, keyutils
, rdma-core
, rabbitmq-c
, libaio ? null
, libxfs ? null
, liburing ? null
, zfs ? null
, ...
}:
# We must have one crypto library
assert cryptopp != null || (nss != null && nspr != null);
let
shouldUsePkg = pkg: if pkg != null && pkg.meta.available then pkg else null;
optYasm = shouldUsePkg yasm;
optExpat = shouldUsePkg expat;
optCurl = shouldUsePkg curl;
optFuse = shouldUsePkg fuse;
optLibedit = shouldUsePkg libedit;
optLibatomic_ops = shouldUsePkg libatomic_ops;
optLibs3 = shouldUsePkg libs3;
optJemalloc = shouldUsePkg jemalloc;
optGperftools = shouldUsePkg gperftools;
optCryptopp = shouldUsePkg cryptopp;
optNss = shouldUsePkg nss;
optNspr = shouldUsePkg nspr;
optLibaio = shouldUsePkg libaio;
optLibxfs = shouldUsePkg libxfs;
optZfs = shouldUsePkg zfs;
# Downgrade rocksdb, 7.10 breaks ceph
rocksdb' = rocksdb.overrideAttrs {
version = "7.9.2";
src = fetchFromGitHub {
owner = "facebook";
repo = "rocksdb";
rev = "refs/tags/v7.9.2";
hash = "sha256-5P7IqJ14EZzDkbjaBvbix04ceGGdlWBuVFH/5dpD5VM=";
};
};
hasRadosgw = optExpat != null && optCurl != null && optLibedit != null;
# Malloc implementation (can be jemalloc, tcmalloc or null)
malloc = if optJemalloc != null then optJemalloc else optGperftools;
# We prefer nss over cryptopp
cryptoStr = if optNss != null && optNspr != null then "nss" else
if optCryptopp != null then "cryptopp" else "none";
cryptoLibsMap = {
nss = [ optNss optNspr ];
cryptopp = [ optCryptopp ];
none = [ ];
};
getMeta = description: with lib; {
homepage = "https://ceph.io/en/";
inherit description;
license = with licenses; [ lgpl21 gpl2 bsd3 mit publicDomain ];
maintainers = with maintainers; [ adev ak johanot krav ];
platforms = [ "x86_64-linux" "aarch64-linux" ];
};
ceph-common = with python.pkgs; buildPythonPackage {
pname = "ceph-common";
inherit src version;
sourceRoot = "ceph-${version}/src/python-common";
propagatedBuildInputs = [
pyyaml
];
nativeCheckInputs = [
pytestCheckHook
];
disabledTests = [
# requires network access
"test_valid_addr"
];
meta = getMeta "Ceph common module for code shared by manager modules";
};
# Watch out for python <> boost compatibility
python = python310.override {
packageOverrides = self: super: {
sqlalchemy = super.sqlalchemy.overridePythonAttrs rec {
version = "1.4.46";
src = fetchPypi {
pname = "SQLAlchemy";
inherit version;
hash = "sha256-aRO4JH2KKS74MVFipRkx4rQM6RaB8bbxj2lwRSAMSjA=";
};
disabledTestPaths = [
"test/aaa_profiling"
"test/ext/mypy"
];
};
};
};
boost = boost179.override {
enablePython = true;
inherit python;
};
# TODO: split this off in build and runtime environment
ceph-python-env = python.withPackages (ps: with ps; [
ceph-common
# build time
cython
# debian/control
bcrypt
cherrypy
influxdb
jinja2
kubernetes
natsort
numpy
pecan
prettytable
pyjwt
pyopenssl
python-dateutil
pyyaml
requests
routes
scikit-learn
scipy
setuptools
sphinx
virtualenv
werkzeug
# src/pybind/mgr/requirements-required.txt
cryptography
jsonpatch
# src/tools/cephfs/shell/setup.py
cmd2
colorama
]);
inherit (ceph-python-env.python) sitePackages;
version = "18.2.0";
src = fetchurl {
url = "https://download.ceph.com/tarballs/ceph-${version}.tar.gz";
hash = "sha256:0k9nl6xi5brva51rr14m7ig27mmmd7vrpchcmqc40q3c2khn6ns9";
};
in rec {
ceph = stdenv.mkDerivation {
pname = "ceph";
inherit src version;
nativeBuildInputs = [
cmake
fmt
git
makeWrapper
nasm
pkg-config
python
python.pkgs.python # for the toPythonPath function
python.pkgs.wrapPython
which
(ensureNewerSourcesHook { year = "1980"; })
# for building docs/man-pages presumably
doxygen
graphviz
];
enableParallelBuilding = true;
buildInputs = cryptoLibsMap.${cryptoStr} ++ [
arrow-cpp
babeltrace
boost
bzip2
ceph-python-env
cryptsetup
cunit
gperf
gtest
icu
libcap
libnl
libxml2
lttng-ust
lua
lz4
malloc
oath-toolkit
openldap
optLibatomic_ops
optLibs3
optYasm
rdkafka
rocksdb'
snappy
sqlite
utf8proc
zlib
zstd
] ++ lib.optionals stdenv.isLinux [
keyutils
libcap_ng
liburing
libuuid
linuxHeaders
optLibaio
optLibxfs
optZfs
rabbitmq-c
rdma-core
udev
util-linux
] ++ lib.optionals hasRadosgw [
optCurl
optExpat
optFuse
optLibedit
];
pythonPath = [ ceph-python-env "${placeholder "out"}/${ceph-python-env.sitePackages}" ];
preConfigure =''
substituteInPlace src/common/module.c --replace "/sbin/modinfo" "modinfo"
substituteInPlace src/common/module.c --replace "/sbin/modprobe" "modprobe"
substituteInPlace src/common/module.c --replace "/bin/grep" "grep"
# install target needs to be in PYTHONPATH for "*.pth support" check to succeed
# set PYTHONPATH, so the build system doesn't silently skip installing ceph-volume and others
export PYTHONPATH=${ceph-python-env}/${sitePackages}:$lib/${sitePackages}:$out/${sitePackages}
patchShebangs src/
'';
cmakeFlags = [
"-DCMAKE_INSTALL_DATADIR=${placeholder "lib"}/lib"
"-DWITH_CEPHFS_SHELL:BOOL=ON"
"-DWITH_SYSTEMD:BOOL=OFF"
# `WITH_JAEGER` requires `thrift` as a depenedncy (fine), but the build fails with:
# CMake Error at src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-Release.cmake:49 (message):
# Command failed: 2
#
# 'make' 'opentelemetry_trace' 'opentelemetry_exporter_jaeger_trace'
#
# See also
#
# /build/ceph-18.2.0/build/src/opentelemetry-cpp/src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-*.log
# and that file contains:
# /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc: In member function 'virtual void opentelemetry::v1::exporter::jaeger::TUDPTransport::close()':
# /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc:71:7: error: '::close' has not been declared; did you mean 'pclose'?
# 71 | ::THRIFT_CLOSESOCKET(socket_);
# | ^~~~~~~~~~~~~~~~~~
# Looks like `close()` is somehow not included.
# But the relevant code is already removed in `open-telemetry` 1.10: https://github.com/open-telemetry/opentelemetry-cpp/pull/2031
# So it's proably not worth trying to fix that for this Ceph version,
# and instead just disable Ceph's Jaeger support.
"-DWITH_JAEGER:BOOL=OFF"
"-DWITH_TESTS:BOOL=OFF"
# Use our own libraries, where possible
"-DWITH_SYSTEM_ARROW:BOOL=ON" # Only used if other options enable Arrow support.
"-DWITH_SYSTEM_BOOST:BOOL=ON"
"-DWITH_SYSTEM_GTEST:BOOL=ON"
"-DWITH_SYSTEM_ROCKSDB:BOOL=ON"
"-DWITH_SYSTEM_UTF8PROC:BOOL=ON"
"-DWITH_SYSTEM_ZSTD:BOOL=ON"
# TODO breaks with sandbox, tries to download stuff with npm
"-DWITH_MGR_DASHBOARD_FRONTEND:BOOL=OFF"
# WITH_XFS has been set default ON from Ceph 16, keeping it optional in nixpkgs for now
''-DWITH_XFS=${if optLibxfs != null then "ON" else "OFF"}''
] ++ lib.optional stdenv.isLinux "-DWITH_SYSTEM_LIBURING=ON";
postFixup = ''
wrapPythonPrograms
wrapProgram $out/bin/ceph-mgr --prefix PYTHONPATH ":" "$(toPythonPath ${placeholder "out"}):$(toPythonPath ${ceph-python-env})"
# Test that ceph-volume exists since the build system has a tendency to
# silently drop it with misconfigurations.
test -f $out/bin/ceph-volume
'';
outputs = [ "out" "lib" "dev" "doc" "man" ];
doCheck = false; # uses pip to install things from the internet
# Takes 7+h to build with 2 cores.
requiredSystemFeatures = [ "big-parallel" ];
meta = getMeta "Distributed storage system";
passthru = {
inherit version;
tests = {
inherit (nixosTests)
ceph-multi-node
ceph-single-node
ceph-single-node-bluestore;
};
};
};
ceph-client = runCommand "ceph-client-${version}" {
meta = getMeta "Tools needed to mount Ceph's RADOS Block Devices/Cephfs";
} ''
mkdir -p $out/{bin,etc,${sitePackages},share/bash-completion/completions}
cp -r ${ceph}/bin/{ceph,.ceph-wrapped,rados,rbd,rbdmap} $out/bin
cp -r ${ceph}/bin/ceph-{authtool,conf,dencoder,rbdnamer,syn} $out/bin
cp -r ${ceph}/bin/rbd-replay* $out/bin
cp -r ${ceph}/sbin/mount.ceph $out/bin
cp -r ${ceph}/sbin/mount.fuse.ceph $out/bin
ln -s bin $out/sbin
cp -r ${ceph}/${sitePackages}/* $out/${sitePackages}
cp -r ${ceph}/etc/bash_completion.d $out/share/bash-completion/completions
# wrapPythonPrograms modifies .ceph-wrapped, so lets just update its paths
substituteInPlace $out/bin/ceph --replace ${ceph} $out
substituteInPlace $out/bin/.ceph-wrapped --replace ${ceph} $out
'';
}

View File

@@ -1,25 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "meteocat-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
beautifulsoup4
lxml
prometheus-client
];
meta = with lib; {
description = "MeteoCat Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,54 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
from bs4 import BeautifulSoup
from urllib import request
# Configuration -------------------------------------------
meteo_station = "X8" # Barcelona - Zona Universitària
listening_port = 9929
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_tmin = Gauge('meteocat_temp_min', 'Min temperature')
metric_tmax = Gauge('meteocat_temp_max', 'Max temperature')
metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature')
metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation')
def update(st):
url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st
response = request.urlopen(url)
data = response.read()
soup = BeautifulSoup(data, 'lxml')
table = soup.find("table", {"class" : "tblperiode"})
rows = table.find_all('tr')
row = rows[-1] # Take the last row
row_data = []
header = row.find('th')
header_text = header.text.strip()
row_data.append(header_text)
for col in row.find_all('td'):
row_data.append(col.text)
try:
# Sometimes it will return '(s/d)' and fail to parse
metric_tavg.set(float(row_data[1]))
metric_tmax.set(float(row_data[2]))
metric_tmin.set(float(row_data[3]))
metric_srad.set(float(row_data[10]))
#print("ok: temp_avg={}".format(float(row_data[1])))
except:
print("cannot parse row: {}".format(row))
metric_tavg.set(float("nan"))
metric_tmax.set(float("nan"))
metric_tmin.set(float("nan"))
metric_srad.set(float("nan"))
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
update(meteo_station)
except:
print("update failed")
time.sleep(update_period)

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='meteocat-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["meteocat-exporter"],
)

View File

@@ -1,36 +0,0 @@
diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
index 33e88bc..ee3641c 100644
--- a/src/util/mpir_hwtopo.c
+++ b/src/util/mpir_hwtopo.c
@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
#ifdef HAVE_HWLOC
bindset = hwloc_bitmap_alloc();
hwloc_topology_init(&hwloc_topology);
- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
- if (xmlfile != NULL) {
- int rc;
- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
- if (rc == 0) {
- /* To have hwloc still actually call OS-specific hooks, the
- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
- * file is really the underlying system. */
- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
- }
- MPL_free(xmlfile);
- }
hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
if (!hwloc_topology_load(hwloc_topology))
--- a/src/mpi/init/local_proc_attrs.c
+++ b/src/mpi/init/local_proc_attrs.c
@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
/* Set the number of tag bits. The device may override this value. */
MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
- MPL_free(requested_kinds);
-
return mpi_errno;
}

11
pkgs/nix-socket.patch Normal file
View File

@@ -0,0 +1,11 @@
--- a/misc/systemd/nix-daemon.socket.in 1970-01-01 01:00:01.000000000 +0100
+++ b/misc/systemd/nix-daemon.socket.in 2023-09-18 17:53:32.351760208 +0200
@@ -1,7 +1,7 @@
[Unit]
Description=Nix Daemon Socket
Before=multi-user.target
-RequiresMountsFor=@storedir@
+RequiresMountsFor=@localstatedir@
ConditionPathIsReadWrite=@localstatedir@/nix/daemon-socket
[Socket]

View File

@@ -1,59 +1,39 @@
final: prev: final: prev:
{ {
# Set MPICH as default bsc = prev.bsc.extend (bscFinal: bscPrev: {
mpi = final.mpich; # Set MPICH as default
mpi = bscFinal.mpich;
# Configure the network for MPICH # Configure the network for MPICH
mpich = with final; let mpich = with final; prev.mpich.overrideAttrs (old: {
# pmix comes with the libraries in .out and headers in .dev buildInput = old.buildInputs ++ [
pmixAll = symlinkJoin { libfabric
name = "pmix-all"; pmix
paths = [ pmix.dev pmix.out ]; ];
}; configureFlags = [
in prev.mpich.overrideAttrs (old: { "--enable-shared"
patches = (old.patches or []) ++ [ "--enable-sharedlib"
# See https://github.com/pmodels/mpich/issues/6946 "--with-pm=no"
./mpich-fix-hwtopo.patch "--with-device=ch4:ofi"
]; "--with-pmi=pmix"
buildInput = old.buildInputs ++ [ "--with-pmix=${final.pmix}"
libfabric "--with-libfabric=${final.libfabric}"
pmixAll "--enable-g=log"
]; ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
configureFlags = [ "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
"--enable-shared" "FCFLAGS=-fallow-argument-mismatch"
"--enable-sharedlib" ];
"--with-pm=no" });
"--with-device=ch4:ofi"
"--with-pmi=pmix"
"--with-pmix=${pmixAll}"
"--with-libfabric=${libfabric}"
"--enable-g=log"
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
"FCFLAGS=-fallow-argument-mismatch"
];
}); });
slurm = prev.slurm.overrideAttrs (old: { # Update ceph to 18.2.0 until it lands in nixpkgs, see:
patches = (old.patches or []) ++ [ # https://github.com/NixOS/nixpkgs/pull/247849
# See https://bugs.schedmd.com/show_bug.cgi?id=19324 inherit (prev.callPackage ./ceph.nix {
./slurm-rank-expansion.patch lua = prev.lua5_4;
]; fmt = prev.fmt_8;
# Install also the pam_slurm_adopt library to restrict users from accessing }) ceph ceph-client;
# nodes with no job allocated.
postBuild = (old.postBuild or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security"
popd
'';
postInstall = (old.postInstall or "") + ''
pushd contribs/pam_slurm_adopt
make "PAM_DIR=$out/lib/security" install
popd
'';
});
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; nix = prev.nix.overrideAttrs (old: {
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; patches = old.patches ++ [ ./nix-socket.patch ];
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { }; });
} }

View File

@@ -1,22 +0,0 @@
{ buildGoModule, fetchFromGitHub, lib }:
buildGoModule rec {
pname = "prometheus-slurm-exporter";
version = "0.20";
src = fetchFromGitHub {
rev = version;
owner = "vpenso";
repo = pname;
sha256 = "sha256-KS9LoDuLQFq3KoKpHd8vg1jw20YCNRJNJrnBnu5vxvs=";
};
vendorHash = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI=";
doCheck = false;
meta = with lib; {
description = "Prometheus SLURM Exporter";
homepage = "https://github.com/vpenso/prometheus-slurm-exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,11 +0,0 @@
--- a/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:05:24.815313882 +0100
+++ b/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:09:53.936900823 +0100
@@ -314,7 +314,7 @@ static void _dmdx_req(buf_t *buf, int no
}
nsptr = pmixp_nspaces_local();
- if (nsptr->ntasks <= rank) {
+ if ((long) nsptr->ntasks <= (long) rank) {
char *nodename = pmixp_info_job_host(nodeid);
PMIXP_ERROR("Bad request from %s: nspace \"%s\" has only %d ranks, asked for %d",
nodename, ns, nsptr->ntasks, rank);

View File

@@ -1,24 +0,0 @@
{ python3Packages, lib }:
python3Packages.buildPythonApplication rec {
pname = "upc-qaire-exporter";
version = "1.0";
src = ./.;
doCheck = false;
build-system = with python3Packages; [
setuptools
];
dependencies = with python3Packages; [
prometheus-client
requests
];
meta = with lib; {
description = "UPC Qaire Prometheus Exporter";
platforms = platforms.linux;
};
}

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
setup(name='upc-qaire-exporter',
version='1.0',
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["upc-qaire-exporter"],
)

View File

@@ -1,74 +0,0 @@
#!/usr/bin/env python3
import time
from prometheus_client import start_http_server, Gauge
import requests, json
from datetime import datetime, timedelta
# Configuration -------------------------------------------
listening_port = 9928
update_period = 60 * 5 # Each 5 min
# ---------------------------------------------------------
metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor')
def genparams():
d = {}
d['topic'] = 'TEMPERATURE'
d['shift_dates_to'] = ''
d['datapoints'] = 301
d['devicesAndColors'] = '1148418@@@#40ACB6'
now = datetime.now()
d['fromDate'] = now.strftime('%d/%m/%Y')
d['toDate'] = now.strftime('%d/%m/%Y')
d['serviceFrequency'] = 'NONE'
# WTF!
for i in range(7):
for j in range(48):
key = 'week.days[{}].hours[{}].value'.format(i, j)
d[key] = 'OPEN'
return d
def measure():
# First we need to load session
s = requests.Session()
r = s.get("https://upc.edu/sirena")
if r.status_code != 200:
print("bad HTTP status code on new session: {}".format(r.status_code))
return
if s.cookies.get("JSESSIONID") is None:
print("cannot get JSESSIONID")
return
# Now we can pull the data
url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json"
r = s.post(url, data=genparams())
if r.status_code != 200:
print("bad HTTP status code on data: {}".format(r.status_code))
return
#print(r.text)
j = json.loads(r.content)
# Just take the last one
last = j['data']['chartElementList'][-1]
temp = last['values']['1148418-Temperatura']
return temp
if __name__ == '__main__':
start_http_server(port=listening_port, addr="localhost")
while True:
try:
metric_temp.set(measure())
except:
print("measure failed")
metric_temp.set(float("nan"))
time.sleep(update_period)

Binary file not shown.

Binary file not shown.

View File

@@ -1,10 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg XPOFoZqY+AnKC77jrgNqAm1ADphurfuhO4NRrfiuUDc
iCfMMpGHyaYHGy6ci8sqjUtcPeteLlyvLGEF79VPOEc
-> ssh-ed25519 CAWG4Q 6OsGrnM+/c5lTN81Rvp166K+ygmSIFeSYzXxYg25KGE
Av1zTw2zK4Gufzti9kQaye7C362GCiDRRHzCqBLR33g
-> ssh-ed25519 MSF3dg 8CHqJ7mEDvjvqbmF+eE6Em1Wi6eHAzEUpiExC1gm7S0
bdwzYHw3RAbdHq+RsiFUP++sQ586VUlSnAzAOhiQUjI
--- gA5XSUfjUBol938sC5DbUf8PvQUIr2pNkS2nL95OF9c
<EFBFBD><EFBFBD><EFBFBD>Ea1G7<47><37>ݩ[R<><52>\{~$Go<47>cQ<63>wKP&<26><><EFBFBD>w<EFBFBD><77><EFBFBD>6]
<EFBFBD><EFBFBD>ѣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>^z<7A>̄ 1k<31><6B><03><><EFBFBD><EFBFBD><EFBFBD>Y<EFBFBD><59>2<15>p<>2<EFBFBD><32>K<EFBFBD><4B><EFBFBD><EFBFBD>nok<>/X<><58>pt''<27><>$0co=<3D><EFBFBD>

View File

@@ -1,9 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg pXNTB/ailRwSEJG1pXvrzzpz5HqkDZdWVWnOH7JGeQ4
NzA+2fxfkNRy/u+Zq96A02K1Vxy0ETYZjMkDVTKyCY8
-> ssh-ed25519 CAWG4Q 7CLJWn+EAxoWDduXaOSrHaBFHQ4GIpYP/62FFTj3ZTI
vSYV1pQg2qI2ngCzM0nCZAnqdz1tbT4hM5m+/TyGU2c
-> ssh-ed25519 MSF3dg Akmp4NcZcDuaYHta/Vej6zulNSrAOCd5lmSV+OiBGC4
qTxqVzTyywur+GjtUQdbaIUdH1fqCqPe6qPf8iHRa4w
--- uCKNqD1TmZZThOzlpsecBKx/k+noIWhCVMr/pzNwBr8
r'<27>Ƌs4<15>˺<EFBFBD><41>P<05>L<EFBFBD><4C>7` <09><02><>) H<><48>-<2D>0<EFBFBD>AH<41>5<16><><1F><>L<EFBFBD>Qe<51><65>H2b<32><1D><42>CJG<4A>"-S<><1C>\<5C><> <0C>H<n<>V

View File

@@ -1,9 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg s6iI9f25xulF4KXt+XY07kXXPKxXo7f2Ql/OTHN55Hk
WO4Fd2H9c+HL3+XhUF3BmEZVILlcchGxSrSmL2OEdGw
-> ssh-ed25519 CAWG4Q TBkdpx8k8K1NvW3wcvaF7omKFwEJ2DxWJp3tIOTjwCA
LcYgWRix23AQnw0OQ7f8+8S3J84CHUElX1vKZSETiLE
-> ssh-ed25519 MSF3dg WzrF8kjTP7BXXDjmUp7kPCKguthAW12RPo6Vy2RMmh4
8C3mT9ktudCTANDxhyNszUkbeDG6X4wOJdx825++dYM
--- /w3YQ2UeTi67H1JR0GsdPz2KoLN2Y7BIZfFY+//AWjY
<EFBFBD>ӣ-`P<>@<40><>ބ<><DE84><EFBFBD>)9<>9l<><6C> <0C><>Zf<5A><66>V?I>΍<>w鉐<77> z40 <20>2{i@<40>Z<EFBFBD><5A>x<EFBFBD><78>AHn<48>%<25><> <0C>ʤ<>/W<><57>Ĕ<0F>l<EFBFBD><6C><EFBFBD><EFBFBD>}<7D>&<08><EFBFBD>(<28><><EFBFBD>K<EFBFBD><4B><EFBFBD>S<EFBFBD><53>o<EFBFBD>z<EFBFBD>=<1F>d

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

11
secrets/nosv-token.age Normal file
View File

@@ -0,0 +1,11 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg hrdS7Dl/j+u3XVfM79ZJpZSlre9TcD7DTQ+EEAT6kEE
avUO96P1h7w2BYWgrQ7GpUgdaCV9AZL7eOTTcF9gfro
-> ssh-ed25519 CAWG4Q A5raRY1CAgFYZgoQ92GMyNejYNdHx/7Y6uTS+EjLPWA
FRFqT2Jz7qRcybaxkQTKHGl797LVXoHpYG4RZSrX/70
-> ssh-ed25519 MSF3dg D+R80Bg7W9AuiOMAqtGFZQl994dRBIegYRLmmTaeZ3o
BHvZsugRiuZ91b4jk91h30o3eF3hadSnVCwxXge95T8
-> BT/El`a-grease W{nq|Vm )bld 2Nl}4 N$#JGB4t
oLG+0S1aGfO/ohCfgGmhDhwwLi4H
--- 2I5C+FvBG/K1ZHh7C5QD39feTSLoFGwcTeZAmeILNsI
<EFBFBD><EFBFBD>W<EFBFBD>o<> <14><>d;<3B><>C<EFBFBD>.<2E><>_(<28>u

BIN
secrets/ovni-token.age Normal file

Binary file not shown.

View File

@@ -6,13 +6,9 @@ let
safe = keys.hostGroup.safe ++ adminsKeys; safe = keys.hostGroup.safe ++ adminsKeys;
in in
{ {
"gitea-runner-token.age".publicKeys = hut; "ovni-token.age".publicKeys = hut;
"gitlab-runner-docker-token.age".publicKeys = hut; "nosv-token.age".publicKeys = hut;
"gitlab-runner-shell-token.age".publicKeys = hut;
"gitlab-bsc-docker-token.age".publicKeys = hut;
"nix-serve.age".publicKeys = hut; "nix-serve.age".publicKeys = hut;
"jungle-robot-password.age".publicKeys = hut;
"ipmi.yml.age".publicKeys = hut;
"ceph-user.age".publicKeys = safe; "ceph-user.age".publicKeys = safe;
"munge-key.age".publicKeys = safe; "munge-key.age".publicKeys = safe;

View File

@@ -11,7 +11,7 @@ access to the login machine using a resource petition in the BSC intranet.
Then, to request access to the machines we will need some information about you: Then, to request access to the machines we will need some information about you:
1. Which machines you want access to ([hut](/hut), [fox](/fox), owl1, owl2, eudy, koro...) 1. Which machines you want access to (hut, owl1, owl2, eudy, koro...)
1. Your user name and user id (to match the NFS permissions) 1. Your user name and user id (to match the NFS permissions)
1. Your real name and surname (for identification purposes) 1. Your real name and surname (for identification purposes)
1. The salted hash of your login password, generated with `mkpasswd -m sha-512` 1. The salted hash of your login password, generated with `mkpasswd -m sha-512`

View File

@@ -1,10 +0,0 @@
---
title: "Docs"
description: "Documentation for users of jungle machines"
date: 2023-09-15
---
If this is the first time you use any of the jungle machines with NixOS, follow
the [quick start guide](quickstart).

View File

@@ -1,234 +0,0 @@
---
title: "Quick start"
date: 2023-09-15
---
This documentation will guide you on how to build custom packages of software
and use them in the jungle machines. It has been designed to reduce the friction
from users coming from module systems.
You should be able to access the jungle machines, otherwise [request
access](/access).
## Changes from other HPC machines
Users of other machines have been using the Lmod tool (module load ...) to add
or remove programs from their environment, as well as manually building their
own software for too many years.
While we cannot prevent users from continuing to use this tedious mechanism, we
have designed the jungle machines to be much easier to operate by using the nix
package manager.
### Freedom to install packages
When a user wanted to install a package, it was forced to either do it on its
own directory, or request a system administrator to install it in a shared
directory, so other users can also use that package.
This situation is gone, each user can install any package of software by
themselves, without requiring any other authorization. When two users request
the same package, the same copy will be provided.
A new package will be downloaded if it is available (someone already built it)
or will be built from source on demand.
### No changes over time
All users retain the same versions of the packages they request until they
decide to update them.
## Using nix to manage packages
In this chapter we show how to install packages and enter a development shell to
build new programs from source. The examples are done from the hut machine,
read [this page](/access) to request access.
### Installing binaries
To temporarily install new packages, use:
```text
hut% nix shell jungle#gcc jungle#cowsay jungle#ovni
```
Notice that the packages are described as two parts divided by the `#` symbol.
The first part defines where to take the package from and the second part is
the name of the package. For now we will use `jungle#<package>`. You can find
many more packages here:
<https://search.nixos.org/packages>
You will now enter a new shell, where those requested package **binaries are
available in $PATH**:
```text
hut% cowsay hello world
_____________
< hello world >
-------------
\ ^__^
\ (oo)\_______
(__)\ )\/\
||----w |
|| ||
hut% ovniver
LD_LIBRARY_PATH not set
libovni: build v1.11.0 (a7103f8), dynamic v1.11.0 (a7103f8)
hut% gcc --version
gcc (GCC) 13.3.0
Copyright (C) 2023 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
```
### Building programs
The above method only loads new binaries in the `$PATH`. If we try to build a
program that includes headers or links with a library, it will fail to find
them:
```text
hut$ cat test.c
#include <ovni.h>
int main()
{
ovni_version_check();
return 0;
}
hut% gcc test.c -lovni -o test
test.c:1:10: fatal error: ovni.h: No such file or directory
1 | #include <ovni.h>
| ^~~~~~~~
compilation terminated.
```
We could manually add the full path to the ovni include directory with `-I` and
the libraries with `-L`, but there is a tool that already perform these steps
automatically for us, `nix develop`.
Let's go back to our original shell first, where those packages are not
available anymore:
```
hut% ps
PID TTY TIME CMD
2356260 pts/1 00:00:01 zsh
2457268 pts/1 00:00:00 zsh
2457297 pts/1 00:00:00 ps
hut% exit
hut% ovniver
ovniver: command not found
```
### Creating a flake.nix
To define which packages we want, we will write a small file that list them, a
flake.nix file.
First, we will create a new directory where we are going to be working:
```
hut% mkdir example
hut% cd exmple
```
Then place this flake.nix file:
```nix
{
inputs.jungle.url = "jungle";
outputs = { self, jungle }:
let
pkgs = jungle.outputs.packages.x86_64-linux;
in {
devShells.x86_64-linux.default = pkgs.mkShell {
pname = "devshell";
buildInputs = with pkgs; [
ovni gcc cowsay # more packages here...
];
};
};
}
```
Now enter the shell with:
```
hut% nix develop
warning: creating lock file '/home/Computational/rarias/example/flake.lock':
• Added input 'jungle':
'path:/nix/store/27srv8haj6vv4ywrbmw0a8vds561m8rq-source?lastModified=1739479441&narHash=sha256-Kgjs8SO1w9NbPBu8ghwzCxYJ9kvWpoQOT%2BXwPvA9DcU%3D&rev=76396c0d67ef0cf32377d5c1894bb695293bca9d' (2025-02-13)
• Added input 'jungle/agenix':
'github:ryantm/agenix/f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41?narHash=sha256-b%2Buqzj%2BWa6xgMS9aNbX4I%2BsXeb5biPDi39VgvSFqFvU%3D' (2024-08-10)
• Added input 'jungle/agenix/darwin':
'github:lnl7/nix-darwin/4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d?narHash=sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0%3D' (2023-11-24)
• Added input 'jungle/agenix/darwin/nixpkgs':
follows 'jungle/agenix/nixpkgs'
• Added input 'jungle/agenix/home-manager':
'github:nix-community/home-manager/3bfaacf46133c037bb356193bd2f1765d9dc82c1?narHash=sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE%3D' (2023-12-20)
• Added input 'jungle/agenix/home-manager/nixpkgs':
follows 'jungle/agenix/nixpkgs'
• Added input 'jungle/agenix/nixpkgs':
follows 'jungle/nixpkgs'
• Added input 'jungle/agenix/systems':
'github:nix-systems/default/da67096a3b9bf56a91d16901293e51ba5b49a27e?narHash=sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768%3D' (2023-04-09)
• Added input 'jungle/bscpkgs':
'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f' (2024-11-29)
• Added input 'jungle/bscpkgs/nixpkgs':
follows 'jungle/nixpkgs'
• Added input 'jungle/nixpkgs':
'github:NixOS/nixpkgs/9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc?narHash=sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8%3D' (2025-01-14)
hut$
```
Notice that long list of messages is Nix creating a new flake.lock file with the
current state of the packages. Next invocations will use the same packages as
described by the lock file.
### Building a program from nix develop
Now let's try again building our test program:
```text
hut$ cat test.c
#include <ovni.h>
int main()
{
ovni_version_check();
return 0;
}
hut$ gcc test.c -o test -lovni
hut$ ldd test
linux-vdso.so.1 (0x00007ffff7fc4000)
libovni.so.1 => /nix/store/sqk972akjv0q8dchn8ccjln2llzyyfd0-ovni-1.11.0/lib/libovni.so.1 (0x00007ffff7fab000)
libc.so.6 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/libc.so.6 (0x00007ffff7db2000)
/nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/ld-linux-x86-64.so.2 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib64/ld-linux-x86-64.so.2 (0x00007ffff7fc6000)
hut$ ./test
```
Now the ovni.h header and the libovni library are found and the program is
successfully built, linked and executed.
You can add more packages as needed in your flake.nix:
```nix
buildInputs = with pkgs; [
ovni gcc cowsay # more packages here...
];
```
Make sure you exit the develop shell first, and then enter again with `nix
develop`.
## Remember
- `nix shell` places binaries in the `$PATH`.
- `nix develop` enters a development shell where both binaries and the libraries
and includes are available so you can build new programs.

Some files were not shown because too many files have changed in this diff Show More