Compare commits
332 Commits
Author | SHA1 | Date | |
---|---|---|---|
49e074965c | |||
0c6fdc9183 | |||
e813c811ac | |||
da20987f99 | |||
650be88015 | |||
b802a59868 | |||
7247f7e665 | |||
1d555871a5 | |||
a2535c996d | |||
37e60afb54 | |||
3fe138a418 | |||
4e7a9f7ce4 | |||
a6a1af673a | |||
2a3a7b2fb2 | |||
b4ab1c836a | |||
fb8b4defa7 | |||
1bcfbf8cd6 | |||
9f43a0e13b | |||
3a3c3050ef | |||
4419f68948 | |||
e51fc9ffa5 | |||
2ae9e9b635 | |||
be77f6a5f5 | |||
6316a12a67 | |||
db663913d8 | |||
b4846b0f6c | |||
64a52801ed | |||
7a2f37aaa2 | |||
aae6585f66 | |||
1c15e77c83 | |||
82fc3209de | |||
abeab18270 | |||
1985b58619 | |||
44bd061823 | |||
e8c309f584 | |||
71ae7fb585 | |||
8834d561d2 | |||
29daa3c364 | |||
9c503fbefb | |||
51b6a8b612 | |||
52213d388d | |||
edf744db8d | |||
b82894eaec | |||
1c47199891 | |||
8738bd4eeb | |||
7699783aac | |||
fee1d4da7e | |||
b77ce7fb56 | |||
b4a12625c5 | |||
302106ea9a | |||
96877de8d9 | |||
8878985be6 | |||
737578db34 | |||
88555e3f8c | |||
feb2060be7 | |||
00999434c2 | |||
29d58cc62d | |||
587caf262e | |||
2730404ca5 | |||
84db5e6fd6 | |||
f4f34a3159 | |||
91b8b4a3c5 | |||
6cad205269 | |||
c57bf76969 | |||
ad4b615211 | |||
b4518b59cf | |||
45dc4124a3 | |||
bdfe9a48fd | |||
1b337d31f8 | |||
717cd5a21e | |||
def5955614 | |||
0e3c975cb5 | |||
93189a575e | |||
36592c44eb | |||
a34e3752a2 | |||
0d2dea94fb | |||
7f539d7e06 | |||
f8ec090836 | |||
9a9161fc55 | |||
1a0cf96fc4 | |||
4bd1648074 | |||
15b114ffd6 | |||
dd6d8c9735 | |||
e15a3867d4 | |||
5cad208de6 | |||
c8687f7e45 | |||
d988ef2eff | |||
b07929eab3 | |||
b3e397eb4c | |||
5ad2c683ed | |||
1f06f0fa0c | |||
8ca1d84844 | |||
998f599be3 | |||
fcfc6ac149 | |||
6e87130166 | |||
06f9e6ac6b | |||
da07aedce2 | |||
61427a8bf9 | |||
958ad1f025 | |||
1c5f3a856f | |||
4e2b80defd | |||
1c8efd0877 | |||
4c5e85031b | |||
5688823fcc | |||
72faf8365b | |||
0e22d6def8 | |||
22cc1d33f7 | |||
15085c8a05 | |||
06748dac1d | |||
63851306ac | |||
2bdc793c8c | |||
85d1c5e34c | |||
e6b7af5272 | |||
c0ae8770bc | |||
5b51e8947f | |||
db2c6f7e45 | |||
8e8f9e7adb | |||
d2adc3a6d3 | |||
76cd9ea47f | |||
2f851bc216 | |||
834d3187e5 | |||
49be0f208c | |||
fb23b41dae | |||
005a67deaf | |||
f8097cb5cb | |||
ff792f5f48 | |||
5c48b43ae0 | |||
b299ead00b | |||
a92432cf5a | |||
82f5d828c2 | |||
35a94a9b02 | |||
b6bd31e159 | |||
1d4badda5b | |||
bd5214a3b9 | |||
c32f6dea97 | |||
dd341902fc | |||
190e273112 | |||
268807d1d0 | |||
2953080fb8 | |||
9871517be2 | |||
736eacaac5 | |||
0e66aad099 | |||
67a4905a0a | |||
d52d22e0db | |||
42920c2521 | |||
4acd35e036 | |||
621d20db3a | |||
0926f6ec1f | |||
61646cb3bd | |||
c0066c4744 | |||
ffd0593f51 | |||
f49ae0773e | |||
8fa3fccecb | |||
9ee7111453 | |||
8de3d2b149 | |||
bc62e28ca3 | |||
d612a5453c | |||
653d411b9e | |||
51c57dbc41 | |||
33cd40160e | |||
a1e8cfea47 | |||
5d72ee3da3 | |||
fdc6445d47 | |||
e88805947e | |||
aaefddc44a | |||
d9d249411d | |||
c07f75c6bb | |||
8d449ba20c | |||
10ca572aec | |||
75b0f48715 | |||
19a451db77 | |||
ec9be9bb62 | |||
7ddd1977f3 | |||
7050c505b5 | |||
033a1fe97b | |||
77cb3c494e | |||
6db5772ac4 | |||
3e347e673c | |||
dca274d020 | |||
c33909f32f | |||
64e856e8b9 | |||
02f40a8217 | |||
77d43b6da9 | |||
ab55aac5ff | |||
9b5bfbb7a3 | |||
a69a71d1b0 | |||
98374bd303 | |||
3b6be8a2fc | |||
2bb366b9ac | |||
2d16709648 | |||
9344daa31c | |||
80c98041b5 | |||
3418e57907 | |||
6848b58e39 | |||
13a70411aa | |||
f9c77b433a | |||
9d487845f6 | |||
3c99c2a662 | |||
7d09108c9f | |||
0f0a861896 | |||
beb0d5940e | |||
70321ce237 | |||
5bd1d67333 | |||
fad9df61e1 | |||
d2a80c8c18 | |||
599613d139 | |||
ac4fa9abd4 | |||
cb3a7b19f7 | |||
f5d6bf627b | |||
f1ce815edd | |||
a2075cfd65 | |||
8f1f6f92a8 | |||
3416416864 | |||
815888fb07 | |||
029d9cb1db | |||
95fa67ede1 | |||
a19347161f | |||
58c1cc1f7c | |||
b06399dc70 | |||
077eece6b9 | |||
b3ef53de51 | |||
e0852ee89b | |||
dfffc0bdce | |||
8257c245b1 | |||
cd5853cf53 | |||
b677b827d4 | |||
b1d5185cca | |||
a7e66e2246 | |||
480c97e952 | |||
f8fb5fa4ff | |||
acf9b71f04 | |||
bf692e6e4e | |||
c242b65e47 | |||
55d6c17776 | |||
14b173f67e | |||
b9001cdf7d | |||
f892d43b47 | |||
d9e9ee6e3a | |||
79adbe76a8 | |||
66fb848ba8 | |||
40b1a8f0df | |||
a0b9d10b14 | |||
4c309dea2f | |||
b3a397eee4 | |||
7c1fe1455b | |||
2d4b178895 | |||
4dd25f2f89 | |||
6dcd9d8144 | |||
31be81d2b1 | |||
826cfdf43f | |||
a1f258c5ce | |||
1c1d3f3231 | |||
623d46c03f | |||
518a4d6af3 | |||
60077948d6 | |||
c76bfa7f86 | |||
6c10933e80 | |||
6402605b1f | |||
1724535495 | |||
5b41670f36 | |||
ab04855382 | |||
684d5e41c5 | |||
316ea18e24 | |||
c916157fcc | |||
4e9409db10 | |||
94320d9256 | |||
9f5941c2be | |||
fba0f7b739 | |||
2e95281af5 | |||
f4ac9f3186 | |||
f787343f29 | |||
70304d26ff | |||
76c10ec22e | |||
011e8c2bf8 | |||
c1f138a9c1 | |||
1552eeca12 | |||
8769f3d418 | |||
a4c254fcd6 | |||
24fb1846d2 | |||
5e77d0b86c | |||
494fda126c | |||
5cfa2f9611 | |||
9539a24bdb | |||
98c4d924dd | |||
7aae967c65 | |||
49f7edddac | |||
2f055d9fc5 | |||
108abffd2a | |||
4c19ad66e3 | |||
19c01aeb1d | |||
fc90b40310 | |||
81de0effb1 | |||
5ce93ff85a | |||
c020b9f5d6 | |||
f47734b524 | |||
ca3a7d98f5 | |||
0d5609ecc2 | |||
818edccb34 | |||
2815f5bcfd | |||
c1bbbd7793 | |||
aa1dd14b62 | |||
399103a9b4 | |||
74639d3ece | |||
613a76ac29 | |||
c3ea8864bb | |||
919f211536 | |||
141d77e2b6 | |||
44fcb97ec7 | |||
543983e9f3 | |||
95bbeeb646 | |||
de2af79810 | |||
b9aff1dba5 | |||
7da979bed2 | |||
cfe37640ea | |||
096e407571 | |||
ae31b546e7 | |||
c3a2766bb7 | |||
b568bb36d4 | |||
55f784e6b7 | |||
dfab84b0ba | |||
8f66ba824a | |||
79bd4398f3 | |||
b44afdaaa1 | |||
9528fab3ef | |||
7e82885d84 | |||
57ed0cf319 | |||
b043ee3b1d | |||
9e3bdaabb6 | |||
77f72ac939 | |||
fa25a68571 | |||
![]() |
ea0f406849 | ||
![]() |
9df6be1b6b |
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
*.swp
|
||||
/result
|
||||
/misc
|
BIN
doc/Intel_Server_Board_S2600WF_TPS_2_6.pdf
Normal file
BIN
doc/Intel_Server_Board_S2600WF_TPS_2_6.pdf
Normal file
Binary file not shown.
BIN
doc/R1000WF_SystemIntegration_and_ServiceGuide_Rev2_4.pdf
Normal file
BIN
doc/R1000WF_SystemIntegration_and_ServiceGuide_Rev2_4.pdf
Normal file
Binary file not shown.
BIN
doc/SEL_TroubleshootingGuide.pdf
Normal file
BIN
doc/SEL_TroubleshootingGuide.pdf
Normal file
Binary file not shown.
BIN
doc/bsc-ssf.pdf
Normal file
BIN
doc/bsc-ssf.pdf
Normal file
Binary file not shown.
176
doc/install.md
Normal file
176
doc/install.md
Normal file
@ -0,0 +1,176 @@
|
||||
# Installing NixOS in a new node
|
||||
|
||||
This article shows the steps to install NixOS in a node following the
|
||||
configuration of the repo.
|
||||
|
||||
## Enable the serial console
|
||||
|
||||
By default, the nodes have the serial console disabled in the GRUB and also boot
|
||||
without the serial enabled.
|
||||
|
||||
To enable the serial console in the GRUB, set in /etc/default/grub the following
|
||||
lines:
|
||||
|
||||
```
|
||||
GRUB_TERMINAL="console serial"
|
||||
GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=1"
|
||||
```
|
||||
|
||||
To boot Linux with the serial enabled, so you can see the boot log and login via
|
||||
serial set:
|
||||
|
||||
```
|
||||
GRUB_CMDLINE_LINUX="console=ttyS0,115200n8 console=tty0"
|
||||
```
|
||||
|
||||
Then update the grub config:
|
||||
|
||||
```
|
||||
# grub2-mkconfig -o /boot/grub2/grub.cfg
|
||||
```
|
||||
|
||||
And reboot.
|
||||
|
||||
## Prepare the disk
|
||||
|
||||
Create a main partition and label it `nixos` following [the manual][1].
|
||||
|
||||
[1]: https://nixos.org/manual/nixos/stable/index.html#sec-installation-manual-partitioning.
|
||||
|
||||
```
|
||||
# disk=/dev/sdX
|
||||
# parted $disk -- mklabel msdos
|
||||
# parted $disk -- mkpart primary 1MB -8GB
|
||||
# parted $disk -- mkpart primary linux-swap -8GB 100%
|
||||
# parted $disk -- set 1 boot on
|
||||
```
|
||||
|
||||
Then create an etx4 filesystem, labeled `nixos` where the system will be
|
||||
installed. **Ensure that no other partition has the same label.**
|
||||
|
||||
```
|
||||
# mkfs.ext4 -L nixos "${disk}1"
|
||||
# mkswap -L swap "${disk}2"
|
||||
# mount ${disk}1 /mnt
|
||||
# lsblk -f $disk
|
||||
NAME FSTYPE LABEL UUID MOUNTPOINT
|
||||
sdX
|
||||
`-sdX1 ext4 nixos 10d73b75-809c-4fa3-b99d-4fab2f0d0d8e /mnt
|
||||
```
|
||||
|
||||
## Prepare nix and nixos-install
|
||||
|
||||
Mount the nix store from the hut node in read-only /nix.
|
||||
|
||||
```
|
||||
# mkdir /nix
|
||||
# mount -o ro hut:/nix /nix
|
||||
```
|
||||
|
||||
Get the nix binary and nixos-install tool from hut:
|
||||
|
||||
```
|
||||
# ssh hut 'readlink -f $(which nix)'
|
||||
/nix/store/0sxbaj71c4c4n43qhdxm31f56gjalksw-nix-2.13.3/bin/nix
|
||||
# ssh hut 'readlink -f $(which nixos-install)'
|
||||
/nix/store/9yq8ps06ysr2pfiwiij39ny56yk3pdcs-nixos-install/bin/nixos-install
|
||||
```
|
||||
|
||||
And add them to the PATH:
|
||||
|
||||
```
|
||||
# export PATH=$PATH:/nix/store/0sxbaj71c4c4n43qhdxm31f56gjalksw-nix-2.13.3/bin
|
||||
# export PATH=$PATH:/nix/store/9yq8ps06ysr2pfiwiij39ny56yk3pdcs-nixos-install/bin/
|
||||
# nix --version
|
||||
nix (Nix) 2.13.3
|
||||
```
|
||||
|
||||
## Adapt owl configuration
|
||||
|
||||
Clone owl repo:
|
||||
|
||||
```
|
||||
$ git clone git@bscpm03.bsc.es:rarias/owl.git
|
||||
$ cd owl
|
||||
```
|
||||
|
||||
Edit the configuration to your needs.
|
||||
|
||||
## Install from another Linux OS
|
||||
|
||||
Install nixOS into the storage drive.
|
||||
|
||||
```
|
||||
# nixos-install --flake --root /mnt .#xeon0X
|
||||
```
|
||||
|
||||
At this point, the nixOS grub has been installed into the nixos device, which
|
||||
is not the default boot device. To keep both the old Linux and NixOS grubs, add
|
||||
an entry into the old Linux grub to jump into the new grub.
|
||||
|
||||
```
|
||||
# echo "
|
||||
|
||||
menuentry 'NixOS' {
|
||||
insmod chain
|
||||
search --no-floppy --label nixos --set root
|
||||
configfile /boot/grub/grub.cfg
|
||||
} " >> /etc/grub.d/40_custom
|
||||
```
|
||||
|
||||
Rebuild grub config.
|
||||
|
||||
```
|
||||
# grub2-mkconfig -o /boot/grub/grub.cfg
|
||||
```
|
||||
|
||||
To boot into NixOS manually, reboot and select NixOS in the grub menu to boot
|
||||
into NixOS.
|
||||
|
||||
To temporarily boot into NixOS only on the next reboot run:
|
||||
|
||||
```
|
||||
# grub2-reboot 'NixOS'
|
||||
```
|
||||
|
||||
To permanently boot into NixOS as the default boot OS, edit `/etc/default/grub/`:
|
||||
|
||||
```
|
||||
GRUB_DEFAULT='NixOS'
|
||||
```
|
||||
|
||||
And update grub.
|
||||
|
||||
```
|
||||
# grub2-mkconfig -o /boot/grub/grub.cfg
|
||||
```
|
||||
|
||||
## Build the nixos kexec image
|
||||
|
||||
```
|
||||
# nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v
|
||||
```
|
||||
|
||||
## Chain NixOS in same disk with other systems
|
||||
|
||||
To install NixOS on a partition along another system which controls the GRUB,
|
||||
first disable the grub device, so the GRUB is not installed in the disk by
|
||||
NixOS (only the /boot files will be generated):
|
||||
|
||||
```
|
||||
boot.loader.grub.device = "nodev";
|
||||
```
|
||||
|
||||
Then add the following entry to the old GRUB configuration:
|
||||
|
||||
```
|
||||
menuentry 'NixOS' {
|
||||
insmod chain
|
||||
search --no-floppy --label nixos --set root
|
||||
configfile /boot/grub/grub.cfg
|
||||
}
|
||||
```
|
||||
|
||||
The partition with NixOS must have the label "nixos" for it to be found. New
|
||||
system configuration entries will be stored in the GRUB configuration managed
|
||||
by NixOS, so there is no need to change the old GRUB settings.
|
130
flake.lock
generated
Normal file
130
flake.lock
generated
Normal file
@ -0,0 +1,130 @@
|
||||
{
|
||||
"nodes": {
|
||||
"agenix": {
|
||||
"inputs": {
|
||||
"darwin": "darwin",
|
||||
"home-manager": "home-manager",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1723293904,
|
||||
"narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=",
|
||||
"owner": "ryantm",
|
||||
"repo": "agenix",
|
||||
"rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "ryantm",
|
||||
"repo": "agenix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"bscpkgs": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1732868163,
|
||||
"narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f",
|
||||
"revCount": 952,
|
||||
"type": "git",
|
||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||
},
|
||||
"original": {
|
||||
"type": "git",
|
||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||
}
|
||||
},
|
||||
"darwin": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"agenix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1700795494,
|
||||
"narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
|
||||
"owner": "lnl7",
|
||||
"repo": "nix-darwin",
|
||||
"rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "lnl7",
|
||||
"ref": "master",
|
||||
"repo": "nix-darwin",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"home-manager": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"agenix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1703113217,
|
||||
"narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
|
||||
"owner": "nix-community",
|
||||
"repo": "home-manager",
|
||||
"rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "home-manager",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1736867362,
|
||||
"narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-24.11",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"agenix": "agenix",
|
||||
"bscpkgs": "bscpkgs",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
37
flake.nix
Normal file
37
flake.nix
Normal file
@ -0,0 +1,37 @@
|
||||
{
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
|
||||
agenix.url = "github:ryantm/agenix";
|
||||
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
||||
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
|
||||
bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
|
||||
let
|
||||
mkConf = name: nixpkgs.lib.nixosSystem {
|
||||
system = "x86_64-linux";
|
||||
specialArgs = { inherit nixpkgs bscpkgs agenix; theFlake = self; };
|
||||
modules = [ "${self.outPath}/m/${name}/configuration.nix" ];
|
||||
};
|
||||
in
|
||||
{
|
||||
nixosConfigurations = {
|
||||
hut = mkConf "hut";
|
||||
tent = mkConf "tent";
|
||||
owl1 = mkConf "owl1";
|
||||
owl2 = mkConf "owl2";
|
||||
eudy = mkConf "eudy";
|
||||
koro = mkConf "koro";
|
||||
bay = mkConf "bay";
|
||||
lake2 = mkConf "lake2";
|
||||
raccoon = mkConf "raccoon";
|
||||
fox = mkConf "fox";
|
||||
};
|
||||
|
||||
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {
|
||||
bscpkgs = bscpkgs.packages.x86_64-linux;
|
||||
nixpkgs = nixpkgs.legacyPackages.x86_64-linux;
|
||||
};
|
||||
};
|
||||
}
|
31
keys.nix
Normal file
31
keys.nix
Normal file
@ -0,0 +1,31 @@
|
||||
# As agenix needs to parse the secrets from a standalone .nix file, we describe
|
||||
# here all the public keys
|
||||
rec {
|
||||
hosts = {
|
||||
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
||||
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
||||
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
||||
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
||||
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
||||
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
||||
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
||||
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
|
||||
};
|
||||
|
||||
hostGroup = with hosts; rec {
|
||||
untrusted = [ fox ];
|
||||
compute = [ owl1 owl2 ];
|
||||
playground = [ eudy koro ];
|
||||
storage = [ bay lake2 ];
|
||||
monitor = [ hut ];
|
||||
|
||||
system = storage ++ monitor;
|
||||
safe = system ++ compute;
|
||||
all = safe ++ playground;
|
||||
};
|
||||
|
||||
admins = {
|
||||
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
||||
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
|
||||
};
|
||||
}
|
107
m/bay/configuration.nix
Normal file
107
m/bay/configuration.nix
Normal file
@ -0,0 +1,107 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../module/monitoring.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d";
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"kernel.yama.ptrace_scope" = lib.mkForce "1";
|
||||
};
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
ceph
|
||||
];
|
||||
|
||||
networking = {
|
||||
hostName = "bay";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.40";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.40";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
firewall = {
|
||||
extraCommands = ''
|
||||
# Accept all incoming TCP traffic from lake2
|
||||
iptables -A nixos-fw -p tcp -s lake2 -j nixos-fw-accept
|
||||
# Accept monitoring requests from hut
|
||||
iptables -A nixos-fw -p tcp -s hut -m multiport --dport 9283,9002 -j nixos-fw-accept
|
||||
# Accept all Ceph traffic from the local network
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
services.ceph = {
|
||||
enable = true;
|
||||
global = {
|
||||
fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b";
|
||||
monHost = "10.0.40.40";
|
||||
monInitialMembers = "bay";
|
||||
clusterNetwork = "10.0.40.40/24"; # Use Ethernet only
|
||||
};
|
||||
extraConfig = {
|
||||
# Only log to stderr so it appears in the journal
|
||||
"log_file" = "/dev/null";
|
||||
"mon_cluster_log_file" = "/dev/null";
|
||||
"log_to_stderr" = "true";
|
||||
"err_to_stderr" = "true";
|
||||
"log_to_file" = "false";
|
||||
};
|
||||
mds = {
|
||||
enable = true;
|
||||
daemons = [ "mds0" "mds1" ];
|
||||
extraConfig = {
|
||||
"host" = "bay";
|
||||
};
|
||||
};
|
||||
mgr = {
|
||||
enable = true;
|
||||
daemons = [ "bay" ];
|
||||
};
|
||||
mon = {
|
||||
enable = true;
|
||||
daemons = [ "bay" ];
|
||||
};
|
||||
osd = {
|
||||
enable = true;
|
||||
# One daemon per NVME disk
|
||||
daemons = [ "0" "1" "2" "3" ];
|
||||
extraConfig = {
|
||||
"osd crush chooseleaf type" = "0";
|
||||
"osd journal size" = "10000";
|
||||
"osd pool default min size" = "2";
|
||||
"osd pool default pg num" = "200";
|
||||
"osd pool default pgp num" = "200";
|
||||
"osd pool default size" = "3";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Missing service for volumes, see:
|
||||
# https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/
|
||||
systemd.services.ceph-volume = {
|
||||
enable = true;
|
||||
description = "Ceph Volume activation";
|
||||
unitConfig = {
|
||||
Type = "oneshot";
|
||||
After = "local-fs.target";
|
||||
Wants = "local-fs.target";
|
||||
};
|
||||
path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ];
|
||||
serviceConfig = {
|
||||
KillMode = "none";
|
||||
Environment = "CEPH_VOLUME_TIMEOUT=10000";
|
||||
ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'";
|
||||
TimeoutSec = "0";
|
||||
};
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
};
|
||||
}
|
20
m/common/base.nix
Normal file
20
m/common/base.nix
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
# All machines should include this profile.
|
||||
# Includes the basic configuration for an Intel server.
|
||||
imports = [
|
||||
./base/agenix.nix
|
||||
./base/august-shutdown.nix
|
||||
./base/boot.nix
|
||||
./base/env.nix
|
||||
./base/fs.nix
|
||||
./base/hw.nix
|
||||
./base/net.nix
|
||||
./base/nix.nix
|
||||
./base/ntp.nix
|
||||
./base/rev.nix
|
||||
./base/ssh.nix
|
||||
./base/users.nix
|
||||
./base/watchdog.nix
|
||||
./base/zsh.nix
|
||||
];
|
||||
}
|
9
m/common/base/agenix.nix
Normal file
9
m/common/base/agenix.nix
Normal file
@ -0,0 +1,9 @@
|
||||
{ agenix, ... }:
|
||||
|
||||
{
|
||||
imports = [ agenix.nixosModules.default ];
|
||||
|
||||
environment.systemPackages = [
|
||||
agenix.packages.x86_64-linux.default
|
||||
];
|
||||
}
|
14
m/common/base/august-shutdown.nix
Normal file
14
m/common/base/august-shutdown.nix
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
|
||||
# hardware from spurious electrical peaks on the yearly electrical cut for
|
||||
# manteinance that starts on August 4th.
|
||||
systemd.timers.august-shutdown = {
|
||||
description = "Shutdown on August 2nd for maintenance";
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "*-08-02 11:00:00";
|
||||
RandomizedDelaySec = "10min";
|
||||
Unit = "systemd-poweroff.service";
|
||||
};
|
||||
};
|
||||
}
|
37
m/common/base/boot.nix
Normal file
37
m/common/base/boot.nix
Normal file
@ -0,0 +1,37 @@
|
||||
{ lib, pkgs, ... }:
|
||||
|
||||
{
|
||||
# Use the GRUB 2 boot loader.
|
||||
boot.loader.grub.enable = true;
|
||||
|
||||
# Enable GRUB2 serial console
|
||||
boot.loader.grub.extraConfig = ''
|
||||
serial --unit=0 --speed=115200 --word=8 --parity=no --stop=1
|
||||
terminal_input --append serial
|
||||
terminal_output --append serial
|
||||
'';
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"kernel.perf_event_paranoid" = lib.mkDefault "-1";
|
||||
|
||||
# Allow ptracing (i.e. attach with GDB) any process of the same user, see:
|
||||
# https://www.kernel.org/doc/Documentation/security/Yama.txt
|
||||
"kernel.yama.ptrace_scope" = "0";
|
||||
};
|
||||
|
||||
boot.kernelPackages = pkgs.linuxPackages_latest;
|
||||
|
||||
#boot.kernelPatches = lib.singleton {
|
||||
# name = "osnoise-tracer";
|
||||
# patch = null;
|
||||
# extraStructuredConfig = with lib.kernel; {
|
||||
# OSNOISE_TRACER = yes;
|
||||
# HWLAT_TRACER = yes;
|
||||
# };
|
||||
#};
|
||||
|
||||
boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "ehci_pci" "nvme" "usbhid" "sd_mod" ];
|
||||
boot.initrd.kernelModules = [ ];
|
||||
boot.kernelModules = [ "kvm-intel" ];
|
||||
boot.extraModulePackages = [ ];
|
||||
}
|
35
m/common/base/env.nix
Normal file
35
m/common/base/env.nix
Normal file
@ -0,0 +1,35 @@
|
||||
{ pkgs, config, ... }:
|
||||
|
||||
{
|
||||
environment.systemPackages = with pkgs; [
|
||||
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
||||
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
|
||||
ncdu config.boot.kernelPackages.perf ldns
|
||||
# From bsckgs overlay
|
||||
osumb
|
||||
];
|
||||
|
||||
programs.direnv.enable = true;
|
||||
|
||||
# Increase limits
|
||||
security.pam.loginLimits = [
|
||||
{
|
||||
domain = "*";
|
||||
type = "-";
|
||||
item = "memlock";
|
||||
value = "1048576"; # 1 GiB of mem locked
|
||||
}
|
||||
];
|
||||
|
||||
environment.variables = {
|
||||
EDITOR = "vim";
|
||||
VISUAL = "vim";
|
||||
};
|
||||
|
||||
programs.bash.promptInit = ''
|
||||
PS1="\h\\$ "
|
||||
'';
|
||||
|
||||
time.timeZone = "Europe/Madrid";
|
||||
i18n.defaultLocale = "en_DK.UTF-8";
|
||||
}
|
24
m/common/base/fs.nix
Normal file
24
m/common/base/fs.nix
Normal file
@ -0,0 +1,24 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
fileSystems."/" =
|
||||
{ device = "/dev/disk/by-label/nixos";
|
||||
fsType = "ext4";
|
||||
};
|
||||
|
||||
# Trim unused blocks weekly
|
||||
services.fstrim.enable = true;
|
||||
|
||||
swapDevices =
|
||||
[ { device = "/dev/disk/by-label/swap"; }
|
||||
];
|
||||
|
||||
# Tracing
|
||||
fileSystems."/sys/kernel/tracing" = {
|
||||
device = "none";
|
||||
fsType = "tracefs";
|
||||
};
|
||||
|
||||
# Mount a tmpfs into /tmp
|
||||
boot.tmp.useTmpfs = true;
|
||||
}
|
14
m/common/base/hw.nix
Normal file
14
m/common/base/hw.nix
Normal file
@ -0,0 +1,14 @@
|
||||
# Do not modify this file! It was generated by ‘nixos-generate-config’
|
||||
# and may be overwritten by future invocations. Please make changes
|
||||
# to /etc/nixos/configuration.nix instead.
|
||||
{ config, lib, pkgs, modulesPath, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[ (modulesPath + "/installer/scan/not-detected.nix")
|
||||
];
|
||||
|
||||
nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
|
||||
powerManagement.cpuFreqGovernor = lib.mkDefault "powersave";
|
||||
hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||
}
|
19
m/common/base/net.nix
Normal file
19
m/common/base/net.nix
Normal file
@ -0,0 +1,19 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
networking = {
|
||||
enableIPv6 = false;
|
||||
useDHCP = false;
|
||||
|
||||
firewall = {
|
||||
enable = true;
|
||||
allowedTCPPorts = [ 22 ];
|
||||
};
|
||||
|
||||
hosts = {
|
||||
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
|
||||
"84.88.51.152" = [ "raccoon" ];
|
||||
"84.88.51.142" = [ "raccoon-ipmi" ];
|
||||
};
|
||||
};
|
||||
}
|
58
m/common/base/nix.nix
Normal file
58
m/common/base/nix.nix
Normal file
@ -0,0 +1,58 @@
|
||||
{ pkgs, nixpkgs, bscpkgs, theFlake, ... }:
|
||||
|
||||
{
|
||||
nixpkgs.overlays = [
|
||||
bscpkgs.bscOverlay
|
||||
(import ../../../pkgs/overlay.nix)
|
||||
];
|
||||
|
||||
nix = {
|
||||
nixPath = [
|
||||
"nixpkgs=${nixpkgs}"
|
||||
"jungle=${theFlake.outPath}"
|
||||
];
|
||||
|
||||
registry = {
|
||||
nixpkgs.flake = nixpkgs;
|
||||
jungle.flake = theFlake;
|
||||
};
|
||||
|
||||
settings = {
|
||||
experimental-features = [ "nix-command" "flakes" ];
|
||||
sandbox = "relaxed";
|
||||
trusted-users = [ "@wheel" ];
|
||||
flake-registry = pkgs.writeText "global-registry.json"
|
||||
''{"flakes":[],"version":2}'';
|
||||
keep-outputs = true;
|
||||
};
|
||||
|
||||
gc = {
|
||||
automatic = true;
|
||||
dates = "weekly";
|
||||
options = "--delete-older-than 30d";
|
||||
};
|
||||
};
|
||||
|
||||
# The nix-gc.service can begin its execution *before* /home is mounted,
|
||||
# causing it to remove all gcroots considering them as stale, as it cannot
|
||||
# access the symlink. To prevent this problem, we force the service to wait
|
||||
# until /home is mounted as well as other remote FS like /ceph.
|
||||
systemd.services.nix-gc = {
|
||||
# Start remote-fs.target if not already being started and fail if it fails
|
||||
# to start. It will also be stopped if the remote-fs.target fails after
|
||||
# starting successfully.
|
||||
bindsTo = [ "remote-fs.target" ];
|
||||
# Wait until remote-fs.target fully starts before starting this one.
|
||||
after = [ "remote-fs.target"];
|
||||
# Ensure we can access a remote path inside /home
|
||||
unitConfig.ConditionPathExists = "/home/Computational";
|
||||
};
|
||||
|
||||
# This value determines the NixOS release from which the default
|
||||
# settings for stateful data, like file locations and database versions
|
||||
# on your system were taken. It‘s perfectly fine and recommended to leave
|
||||
# this value at the release version of the first install of this system.
|
||||
# Before changing this value read the documentation for this option
|
||||
# (e.g. man configuration.nix or on https://nixos.org/nixos/options.html).
|
||||
system.stateVersion = "22.11"; # Did you read the comment?
|
||||
}
|
9
m/common/base/ntp.nix
Normal file
9
m/common/base/ntp.nix
Normal file
@ -0,0 +1,9 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
services.ntp.enable = true;
|
||||
|
||||
# Use the NTP server at BSC, as we don't have direct access
|
||||
# to the outside world
|
||||
networking.timeServers = [ "84.88.52.36" ];
|
||||
}
|
21
m/common/base/rev.nix
Normal file
21
m/common/base/rev.nix
Normal file
@ -0,0 +1,21 @@
|
||||
{ theFlake, ... }:
|
||||
|
||||
let
|
||||
# Prevent building a configuration without revision
|
||||
rev = if theFlake ? rev then theFlake.rev
|
||||
else throw ("Refusing to build from a dirty Git tree!");
|
||||
in {
|
||||
# Save the commit of the config in /etc/configrev
|
||||
environment.etc.configrev.text = rev + "\n";
|
||||
|
||||
# Keep a log with the config over time
|
||||
system.activationScripts.configRevLog.text = ''
|
||||
BOOTED=$(cat /run/booted-system/etc/configrev 2>/dev/null || echo unknown)
|
||||
CURRENT=$(cat /run/current-system/etc/configrev 2>/dev/null || echo unknown)
|
||||
NEXT=${rev}
|
||||
DATENOW=$(date --iso-8601=seconds)
|
||||
echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log
|
||||
'';
|
||||
|
||||
system.configurationRevision = rev;
|
||||
}
|
18
m/common/base/ssh.nix
Normal file
18
m/common/base/ssh.nix
Normal file
@ -0,0 +1,18 @@
|
||||
{ lib, ... }:
|
||||
|
||||
let
|
||||
keys = import ../../../keys.nix;
|
||||
hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts;
|
||||
in
|
||||
{
|
||||
# Enable the OpenSSH daemon.
|
||||
services.openssh.enable = true;
|
||||
|
||||
programs.ssh.knownHosts = hostsKeys // {
|
||||
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
||||
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
||||
"bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT";
|
||||
"glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
|
||||
"glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
|
||||
};
|
||||
}
|
150
m/common/base/users.nix
Normal file
150
m/common/base/users.nix
Normal file
@ -0,0 +1,150 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../../module/jungle-users.nix
|
||||
];
|
||||
|
||||
users = {
|
||||
mutableUsers = false;
|
||||
users = {
|
||||
# Generate hashedPassword with `mkpasswd -m sha-512`
|
||||
|
||||
root.openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop"
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio"
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"
|
||||
];
|
||||
|
||||
rarias = {
|
||||
uid = 1880;
|
||||
isNormalUser = true;
|
||||
linger = true;
|
||||
home = "/home/Computational/rarias";
|
||||
description = "Rodrigo Arias";
|
||||
group = "Computational";
|
||||
extraGroups = [ "wheel" ];
|
||||
hashedPassword = "$6$u06tkCy13enReBsb$xiI.twRvvTfH4jdS3s68NZ7U9PSbGKs5.LXU/UgoawSwNWhZo2hRAjNL5qG0/lAckzcho2LjD0r3NfVPvthY6/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKBOf4r4lzQfyO0bx5BaREePREw8Zw5+xYgZhXwOZoBO ram@hop"
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio"
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal"
|
||||
];
|
||||
shell = pkgs.zsh;
|
||||
};
|
||||
|
||||
arocanon = {
|
||||
uid = 1042;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/arocanon";
|
||||
description = "Aleix Roca";
|
||||
group = "Computational";
|
||||
extraGroups = [ "wheel" "tracing" ];
|
||||
hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
jungleUsers = {
|
||||
rpenacob = {
|
||||
uid = 2761;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/rpenacob";
|
||||
description = "Raúl Peñacoba";
|
||||
group = "Computational";
|
||||
hosts = [ "owl1" "owl2" "hut" "tent" ];
|
||||
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
||||
];
|
||||
};
|
||||
|
||||
anavarro = {
|
||||
uid = 1037;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/anavarro";
|
||||
description = "Antoni Navarro";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" "tent" "raccoon" "fox" ];
|
||||
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
|
||||
];
|
||||
};
|
||||
|
||||
abonerib = {
|
||||
uid = 4541;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/abonerib";
|
||||
description = "Aleix Boné";
|
||||
group = "Computational";
|
||||
hosts = [ "owl1" "owl2" "hut" "tent" "raccoon" "fox" ];
|
||||
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
|
||||
];
|
||||
};
|
||||
|
||||
vlopez = {
|
||||
uid = 4334;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/vlopez";
|
||||
description = "Victor López";
|
||||
group = "Computational";
|
||||
hosts = [ "koro" ];
|
||||
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
|
||||
];
|
||||
};
|
||||
|
||||
dbautist = {
|
||||
uid = 5649;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/dbautist";
|
||||
description = "Dylan Bautista Cases";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" "tent" "raccoon" ];
|
||||
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
|
||||
];
|
||||
};
|
||||
|
||||
dalvare1 = {
|
||||
uid = 2758;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/dalvare1";
|
||||
description = "David Álvarez";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" "tent" "fox" ];
|
||||
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
|
||||
];
|
||||
};
|
||||
|
||||
varcila = {
|
||||
uid = 5650;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/varcila";
|
||||
description = "Vincent Arcila";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" "tent" "fox" ];
|
||||
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
groups = {
|
||||
Computational = { gid = 564; };
|
||||
tracing = { };
|
||||
};
|
||||
};
|
||||
}
|
9
m/common/base/watchdog.nix
Normal file
9
m/common/base/watchdog.nix
Normal file
@ -0,0 +1,9 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
# The boards have a BMC watchdog controlled by IPMI
|
||||
boot.kernelModules = [ "ipmi_watchdog" ];
|
||||
|
||||
# Enable systemd watchdog with 30 s interval
|
||||
systemd.watchdog.runtimeTime = "30s";
|
||||
}
|
91
m/common/base/zsh.nix
Normal file
91
m/common/base/zsh.nix
Normal file
@ -0,0 +1,91 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
environment.systemPackages = with pkgs; [
|
||||
zsh-completions
|
||||
nix-zsh-completions
|
||||
];
|
||||
|
||||
programs.zsh = {
|
||||
enable = true;
|
||||
histSize = 1000000;
|
||||
|
||||
shellInit = ''
|
||||
# Disable new user prompt
|
||||
if [ ! -e ~/.zshrc ]; then
|
||||
touch ~/.zshrc
|
||||
fi
|
||||
'';
|
||||
|
||||
promptInit = ''
|
||||
# Note that to manually override this in ~/.zshrc you should run `prompt off`
|
||||
# before setting your PS1 and etc. Otherwise this will likely to interact with
|
||||
# your ~/.zshrc configuration in unexpected ways as the default prompt sets
|
||||
# a lot of different prompt variables.
|
||||
autoload -U promptinit && promptinit && prompt default && setopt prompt_sp
|
||||
'';
|
||||
|
||||
# Taken from Ulli Kehrle config:
|
||||
# https://git.hrnz.li/Ulli/nixos/src/commit/2e203b8d8d671f4e3ced0f1744a51d5c6ee19846/profiles/shell.nix#L199-L205
|
||||
interactiveShellInit = ''
|
||||
source "${pkgs.zsh-history-substring-search}/share/zsh-history-substring-search/zsh-history-substring-search.zsh"
|
||||
|
||||
# Save history immediately, but only load it when the shell starts
|
||||
setopt inc_append_history
|
||||
|
||||
# dircolors doesn't support alacritty:
|
||||
# https://lists.gnu.org/archive/html/bug-coreutils/2019-05/msg00029.html
|
||||
export LS_COLORS='rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=00:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.avif=01;35:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:*~=00;90:*#=00;90:*.bak=00;90:*.old=00;90:*.orig=00;90:*.part=00;90:*.rej=00;90:*.swp=00;90:*.tmp=00;90:*.dpkg-dist=00;90:*.dpkg-old=00;90:*.ucf-dist=00;90:*.ucf-new=00;90:*.ucf-old=00;90:*.rpmnew=00;90:*.rpmorig=00;90:*.rpmsave=00;90:';
|
||||
|
||||
# From Arch Linux and GRML
|
||||
bindkey "^R" history-incremental-pattern-search-backward
|
||||
bindkey "^S" history-incremental-pattern-search-forward
|
||||
|
||||
# Auto rehash for new binaries
|
||||
zstyle ':completion:*' rehash true
|
||||
# show a nice menu with the matches
|
||||
zstyle ':completion:*' menu yes select
|
||||
|
||||
bindkey '^[OA' history-substring-search-up # Up
|
||||
bindkey '^[[A' history-substring-search-up # Up
|
||||
|
||||
bindkey '^[OB' history-substring-search-down # Down
|
||||
bindkey '^[[B' history-substring-search-down # Down
|
||||
|
||||
bindkey '\e[1~' beginning-of-line # Home
|
||||
bindkey '\e[7~' beginning-of-line # Home
|
||||
bindkey '\e[H' beginning-of-line # Home
|
||||
bindkey '\eOH' beginning-of-line # Home
|
||||
|
||||
bindkey '\e[4~' end-of-line # End
|
||||
bindkey '\e[8~' end-of-line # End
|
||||
bindkey '\e[F ' end-of-line # End
|
||||
bindkey '\eOF' end-of-line # End
|
||||
|
||||
bindkey '^?' backward-delete-char # Backspace
|
||||
bindkey '\e[3~' delete-char # Del
|
||||
# bindkey '\e[3;5~' delete-char # sometimes Del, sometimes C-Del
|
||||
bindkey '\e[2~' overwrite-mode # Ins
|
||||
|
||||
bindkey '^H' backward-kill-word # C-Backspace
|
||||
|
||||
bindkey '5~' kill-word # C-Del
|
||||
bindkey '^[[3;5~' kill-word # C-Del
|
||||
bindkey '^[[3^' kill-word # C-Del
|
||||
|
||||
bindkey "^[[1;5H" backward-kill-line # C-Home
|
||||
bindkey "^[[7^" backward-kill-line # C-Home
|
||||
|
||||
bindkey "^[[1;5F" kill-line # C-End
|
||||
bindkey "^[[8^" kill-line # C-End
|
||||
|
||||
bindkey '^[[1;5C' forward-word # C-Right
|
||||
bindkey '^[0c' forward-word # C-Right
|
||||
bindkey '^[[5C' forward-word # C-Right
|
||||
|
||||
bindkey '^[[1;5D' backward-word # C-Left
|
||||
bindkey '^[0d' backward-word # C-Left
|
||||
bindkey '^[[5D' backward-word # C-Left
|
||||
'';
|
||||
};
|
||||
}
|
9
m/common/ssf.nix
Normal file
9
m/common/ssf.nix
Normal file
@ -0,0 +1,9 @@
|
||||
{
|
||||
# Provides the base system for a xeon node in the SSF rack.
|
||||
imports = [
|
||||
./xeon.nix
|
||||
./ssf/fs.nix
|
||||
./ssf/net.nix
|
||||
./ssf/ssh.nix
|
||||
];
|
||||
}
|
@ -1,5 +1,3 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
# Mount the home via NFS
|
||||
fileSystems."/home" = {
|
||||
@ -7,10 +5,4 @@
|
||||
fsType = "nfs";
|
||||
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
||||
};
|
||||
|
||||
# Tracing
|
||||
fileSystems."/sys/kernel/tracing" = {
|
||||
device = "none";
|
||||
fsType = "tracefs";
|
||||
};
|
||||
}
|
90
m/common/ssf/net.nix
Normal file
90
m/common/ssf/net.nix
Normal file
@ -0,0 +1,90 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
# Infiniband (IPoIB)
|
||||
environment.systemPackages = [ pkgs.rdma-core ];
|
||||
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
|
||||
|
||||
networking = {
|
||||
defaultGateway = "10.0.40.30";
|
||||
nameservers = ["8.8.8.8"];
|
||||
|
||||
proxy = {
|
||||
default = "http://hut:23080/";
|
||||
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40,hut";
|
||||
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
|
||||
# https://github.com/golang/go/issues/16715
|
||||
allProxy = null;
|
||||
};
|
||||
|
||||
firewall = {
|
||||
extraCommands = ''
|
||||
# Prevent ssfhead from contacting our slurmd daemon
|
||||
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
|
||||
# But accept traffic to slurm ports from any other node in the subnet
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
|
||||
# We also need to open the srun port range
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
|
||||
extraHosts = ''
|
||||
10.0.40.30 ssfhead
|
||||
|
||||
# Node Entry for node: mds01 (ID=72)
|
||||
10.0.40.40 bay mds01 mds01-eth0
|
||||
10.0.42.40 bay-ib mds01-ib0
|
||||
10.0.40.141 bay-ipmi mds01-ipmi0 mds01-ipmi
|
||||
|
||||
# Node Entry for node: oss01 (ID=73)
|
||||
10.0.40.41 oss01 oss01-eth0
|
||||
10.0.42.41 oss01-ib0
|
||||
10.0.40.142 oss01-ipmi0 oss01-ipmi
|
||||
|
||||
# Node Entry for node: oss02 (ID=74)
|
||||
10.0.40.42 lake2 oss02 oss02-eth0
|
||||
10.0.42.42 lake2-ib oss02-ib0
|
||||
10.0.40.143 lake2-ipmi oss02-ipmi0 oss02-ipmi
|
||||
|
||||
# Node Entry for node: xeon01 (ID=15)
|
||||
10.0.40.1 owl1 xeon01 xeon01-eth0
|
||||
10.0.42.1 owl1-ib xeon01-ib0
|
||||
10.0.40.101 owl1-ipmi xeon01-ipmi0 xeon01-ipmi
|
||||
|
||||
# Node Entry for node: xeon02 (ID=16)
|
||||
10.0.40.2 owl2 xeon02 xeon02-eth0
|
||||
10.0.42.2 owl2-ib xeon02-ib0
|
||||
10.0.40.102 owl2-ipmi xeon02-ipmi0 xeon02-ipmi
|
||||
|
||||
# Node Entry for node: xeon03 (ID=17)
|
||||
10.0.40.3 xeon03 xeon03-eth0
|
||||
10.0.42.3 xeon03-ib0
|
||||
10.0.40.103 xeon03-ipmi0 xeon03-ipmi
|
||||
|
||||
# Node Entry for node: xeon04 (ID=18)
|
||||
10.0.40.4 xeon04 xeon04-eth0
|
||||
10.0.42.4 xeon04-ib0
|
||||
10.0.40.104 xeon04-ipmi0 xeon04-ipmi
|
||||
|
||||
# Node Entry for node: xeon05 (ID=19)
|
||||
10.0.40.5 koro xeon05 xeon05-eth0
|
||||
10.0.42.5 koro-ib xeon05-ib0
|
||||
10.0.40.105 koro-ipmi xeon05-ipmi0
|
||||
|
||||
# Node Entry for node: xeon06 (ID=20)
|
||||
10.0.40.6 xeon06 xeon06-eth0
|
||||
10.0.42.6 xeon06-ib0
|
||||
10.0.40.106 xeon06-ipmi0 xeon06-ipmi
|
||||
|
||||
# Node Entry for node: xeon07 (ID=21)
|
||||
10.0.40.7 hut xeon07 xeon07-eth0
|
||||
10.0.42.7 hut-ib xeon07-ib0
|
||||
10.0.40.107 hut-ipmi xeon07-ipmi0 xeon07-ipmi
|
||||
|
||||
# Node Entry for node: xeon08 (ID=22)
|
||||
10.0.40.8 eudy xeon08 xeon08-eth0
|
||||
10.0.42.8 eudy-ib xeon08-ib0
|
||||
10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi
|
||||
'';
|
||||
};
|
||||
}
|
8
m/common/ssf/ssh.nix
Normal file
8
m/common/ssf/ssh.nix
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
# Connect to intranet git hosts via proxy
|
||||
programs.ssh.extraConfig = ''
|
||||
# Connect to BSC machines via hut proxy too
|
||||
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
'';
|
||||
}
|
7
m/common/xeon.nix
Normal file
7
m/common/xeon.nix
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
# Provides the base system for a xeon node, not necessarily in the SSF rack.
|
||||
imports = [
|
||||
./base.nix
|
||||
./xeon/console.nix
|
||||
];
|
||||
}
|
14
m/common/xeon/console.nix
Normal file
14
m/common/xeon/console.nix
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
# Restart the serial console
|
||||
systemd.services."serial-getty@ttyS0" = {
|
||||
enable = true;
|
||||
wantedBy = [ "getty.target" ];
|
||||
serviceConfig.Restart = "always";
|
||||
};
|
||||
|
||||
# Enable serial console
|
||||
boot.kernelParams = [
|
||||
"console=tty1"
|
||||
"console=ttyS0,115200"
|
||||
];
|
||||
}
|
37
m/eudy/configuration.nix
Normal file
37
m/eudy/configuration.nix
Normal file
@ -0,0 +1,37 @@
|
||||
{ config, pkgs, lib, modulesPath, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||
|
||||
./kernel/kernel.nix
|
||||
./cpufreq.nix
|
||||
./fs.nix
|
||||
./users.nix
|
||||
../module/debuginfod.nix
|
||||
];
|
||||
|
||||
# Select this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53564b";
|
||||
|
||||
# disable automatic garbage collector
|
||||
nix.gc.automatic = lib.mkForce false;
|
||||
|
||||
# members of the tracing group can use the lttng-provided kernel events
|
||||
# without root permissions
|
||||
users.groups.tracing.members = [ "arocanon" ];
|
||||
|
||||
# set up both ethernet and infiniband ips
|
||||
networking = {
|
||||
hostName = "eudy";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.8";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.8";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
}
|
40
m/eudy/cpufreq.nix
Normal file
40
m/eudy/cpufreq.nix
Normal file
@ -0,0 +1,40 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
# Disable frequency boost by default. Use the intel_pstate driver instead of
|
||||
# acpi_cpufreq driver because the acpi_cpufreq driver does not read the
|
||||
# complete range of P-States [1]. Use the intel_pstate passive mode [2] to
|
||||
# disable HWP, which allows a core to "select P-states by itself". Also, this
|
||||
# disables intel governors, which confusingly, have the same names as the
|
||||
# generic ones but behave differently [3].
|
||||
|
||||
# Essentially, we use the generic governors, but use the intel driver to read
|
||||
# the P-state list.
|
||||
|
||||
# [1] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#intel-pstate-vs-acpi-cpufreq
|
||||
# [2] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#passive-mode
|
||||
# [3] - https://www.kernel.org/doc/html/latest/admin-guide/pm/intel_pstate.html#active-mode
|
||||
# https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html
|
||||
|
||||
# set intel_pstate to passive mode
|
||||
boot.kernelParams = [
|
||||
"intel_pstate=passive"
|
||||
];
|
||||
# Disable frequency boost
|
||||
system.activationScripts = {
|
||||
disableFrequencyBoost.text = ''
|
||||
echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
|
||||
'';
|
||||
};
|
||||
|
||||
## disable intel_pstate
|
||||
#boot.kernelParams = [
|
||||
# "intel_pstate=disable"
|
||||
#];
|
||||
## Disable frequency boost
|
||||
#system.activationScripts = {
|
||||
# disableFrequencyBoost.text = ''
|
||||
# echo 0 > /sys/devices/system/cpu/cpufreq/boost
|
||||
# '';
|
||||
#};
|
||||
}
|
13
m/eudy/fs.nix
Normal file
13
m/eudy/fs.nix
Normal file
@ -0,0 +1,13 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
fileSystems."/nix" = {
|
||||
device = "/dev/disk/by-label/optane";
|
||||
fsType = "ext4";
|
||||
neededForBoot = true;
|
||||
};
|
||||
fileSystems."/mnt/data" = {
|
||||
device = "/dev/disk/by-label/data";
|
||||
fsType = "ext4";
|
||||
};
|
||||
}
|
10326
m/eudy/kernel/configs/defconfig
Normal file
10326
m/eudy/kernel/configs/defconfig
Normal file
File diff suppressed because it is too large
Load Diff
10333
m/eudy/kernel/configs/lockdep
Normal file
10333
m/eudy/kernel/configs/lockdep
Normal file
File diff suppressed because it is too large
Load Diff
70
m/eudy/kernel/kernel.nix
Normal file
70
m/eudy/kernel/kernel.nix
Normal file
@ -0,0 +1,70 @@
|
||||
{ pkgs, lib, ... }:
|
||||
|
||||
let
|
||||
#fcs-devel = pkgs.linuxPackages_custom {
|
||||
# version = "6.2.8";
|
||||
# src = /mnt/data/kernel/fcs/kernel/src;
|
||||
# configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig;
|
||||
#};
|
||||
|
||||
#fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false;
|
||||
#fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false;
|
||||
#fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true;
|
||||
#fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true;
|
||||
#fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom {
|
||||
# version = "6.2.8";
|
||||
# src = builtins.fetchGit {
|
||||
# url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
||||
# rev = gitCommit;
|
||||
# ref = "fcs";
|
||||
# };
|
||||
# configfile = if lockdep then ./configs/lockdep else ./configs/defconfig;
|
||||
#};
|
||||
|
||||
kernel = nixos-fcs;
|
||||
|
||||
nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
|
||||
version = "6.2.8";
|
||||
src = builtins.fetchGit {
|
||||
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
||||
rev = gitCommit;
|
||||
ref = branch;
|
||||
};
|
||||
structuredExtraConfig = with lib.kernel; {
|
||||
# add general custom kernel options here
|
||||
} // lib.optionalAttrs lockStat {
|
||||
LOCK_STAT = yes;
|
||||
} // lib.optionalAttrs preempt {
|
||||
PREEMPT = lib.mkForce yes;
|
||||
PREEMPT_VOLUNTARY = lib.mkForce no;
|
||||
};
|
||||
kernelPatches = [];
|
||||
extraMeta.branch = lib.versions.majorMinor version;
|
||||
}));
|
||||
|
||||
nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";};
|
||||
nixos-fcs-lockstat = nixos-fcs.override {
|
||||
lockStat = true;
|
||||
};
|
||||
nixos-fcs-lockstat-preempt = nixos-fcs.override {
|
||||
lockStat = true;
|
||||
preempt = true;
|
||||
};
|
||||
latest = pkgs.linuxPackages_latest;
|
||||
|
||||
in {
|
||||
imports = [
|
||||
./lttng.nix
|
||||
./perf.nix
|
||||
];
|
||||
boot.kernelPackages = lib.mkForce kernel;
|
||||
|
||||
# disable all cpu mitigations
|
||||
boot.kernelParams = [
|
||||
"mitigations=off"
|
||||
];
|
||||
|
||||
# enable memory overcommit, needed to build a taglibc system using nix after
|
||||
# increasing the openblas memory footprint
|
||||
boot.kernel.sysctl."vm.overcommit_memory" = 1;
|
||||
}
|
43
m/eudy/kernel/lttng.nix
Normal file
43
m/eudy/kernel/lttng.nix
Normal file
@ -0,0 +1,43 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
let
|
||||
|
||||
# The lttng btrfs probe crashes at compile time because of an undefined
|
||||
# function. This disables the btrfs tracepoints to avoid the issue.
|
||||
|
||||
# Also enable lockdep tracepoints, this is disabled by default because it
|
||||
# does not work well on architectures other than x86_64 (i think that arm) as
|
||||
# I was told on the mailing list.
|
||||
lttng-modules-fixed = config.boot.kernelPackages.lttng-modules.overrideAttrs (finalAttrs: previousAttrs: {
|
||||
patchPhase = (lib.optionalString (previousAttrs ? patchPhase) previousAttrs.patchPhase) + ''
|
||||
# disable btrfs
|
||||
substituteInPlace src/probes/Kbuild \
|
||||
--replace " obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o" " #obj-\$(CONFIG_LTTNG) += lttng-probe-btrfs.o"
|
||||
|
||||
# enable lockdep tracepoints
|
||||
substituteInPlace src/probes/Kbuild \
|
||||
--replace "#ifneq (\$(CONFIG_LOCKDEP),)" "ifneq (\$(CONFIG_LOCKDEP),)" \
|
||||
--replace "# obj-\$(CONFIG_LTTNG) += lttng-probe-lock.o" " obj-\$(CONFIG_LTTNG) += lttng-probe-lock.o" \
|
||||
--replace "#endif # CONFIG_LOCKDEP" "endif # CONFIG_LOCKDEP"
|
||||
'';
|
||||
});
|
||||
in {
|
||||
|
||||
# add the lttng tools and modules to the system environment
|
||||
boot.extraModulePackages = [ lttng-modules-fixed ];
|
||||
environment.systemPackages = with pkgs; [
|
||||
lttng-tools lttng-ust babeltrace
|
||||
];
|
||||
|
||||
# start the lttng root daemon to manage kernel events
|
||||
systemd.services.lttng-sessiond = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
description = "LTTng session daemon for the root user";
|
||||
serviceConfig = {
|
||||
User = "root";
|
||||
ExecStart = ''
|
||||
${pkgs.lttng-tools}/bin/lttng-sessiond
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
22
m/eudy/kernel/perf.nix
Normal file
22
m/eudy/kernel/perf.nix
Normal file
@ -0,0 +1,22 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
{
|
||||
# add the perf tool
|
||||
environment.systemPackages = with pkgs; [
|
||||
config.boot.kernelPackages.perf
|
||||
];
|
||||
|
||||
# allow non-root users to read tracing data from the kernel
|
||||
boot.kernel.sysctl."kernel.perf_event_paranoid" = -2;
|
||||
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
|
||||
|
||||
# specify additionl options to the tracefs directory to allow members of the
|
||||
# tracing group to access tracefs.
|
||||
fileSystems."/sys/kernel/tracing" = {
|
||||
options = [
|
||||
"mode=755"
|
||||
"gid=tracing"
|
||||
];
|
||||
};
|
||||
}
|
||||
|
11
m/eudy/users.nix
Normal file
11
m/eudy/users.nix
Normal file
@ -0,0 +1,11 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
security.sudo.extraRules= [{
|
||||
users = [ "arocanon" ];
|
||||
commands = [{
|
||||
command = "ALL" ;
|
||||
options= [ "NOPASSWD" ]; # "SETENV" # Adding the following could be a good idea
|
||||
}];
|
||||
}];
|
||||
}
|
76
m/fox/configuration.nix
Normal file
76
m/fox/configuration.nix
Normal file
@ -0,0 +1,76 @@
|
||||
{ lib, config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/base.nix
|
||||
../common/xeon/console.nix
|
||||
../module/emulation.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
|
||||
|
||||
# No swap, there is plenty of RAM
|
||||
swapDevices = lib.mkForce [];
|
||||
|
||||
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
|
||||
boot.kernelModules = [ "kvm-amd" ];
|
||||
|
||||
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
|
||||
|
||||
networking = {
|
||||
defaultGateway = "147.83.30.130";
|
||||
nameservers = [ "8.8.8.8" ];
|
||||
hostName = "fox";
|
||||
interfaces.enp1s0f0np0.ipv4.addresses = [
|
||||
{
|
||||
# UPC network
|
||||
# Public IP configuration:
|
||||
# - Hostname: fox.ac.upc.edu
|
||||
# - IP: 147.83.30.141
|
||||
# - Gateway: 147.83.30.130
|
||||
# - NetMask: 255.255.255.192
|
||||
# Private IP configuration for BMC:
|
||||
# - Hostname: fox-ipmi.ac.upc.edu
|
||||
# - IP: 147.83.35.27
|
||||
# - Gateway: 147.83.35.2
|
||||
# - NetMask: 255.255.255.0
|
||||
address = "147.83.30.141";
|
||||
prefixLength = 26; # 255.255.255.192
|
||||
}
|
||||
];
|
||||
extraHosts = ''
|
||||
147.83.30.141 fox.ac.upc.edu
|
||||
147.83.35.27 fox-ipmi.ac.upc.edu
|
||||
'';
|
||||
};
|
||||
|
||||
# Configure Nvidia driver to use with CUDA
|
||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
||||
hardware.graphics.enable = true;
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
nixpkgs.config.nvidia.acceptLicense = true;
|
||||
services.xserver.videoDrivers = [ "nvidia" ];
|
||||
|
||||
# Mount NVME disks
|
||||
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
|
||||
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
|
||||
|
||||
# Make a /nvme{0,1}/$USER directory for each user.
|
||||
systemd.services.create-nvme-dirs = let
|
||||
# Take only normal users in fox
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
commands = lib.concatLists (lib.mapAttrsToList
|
||||
(_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}"
|
||||
]) users);
|
||||
script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands);
|
||||
in {
|
||||
enable = true;
|
||||
wants = [ "local-fs.target" ];
|
||||
after = [ "local-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
}
|
162
m/hut/blackbox.yml
Normal file
162
m/hut/blackbox.yml
Normal file
@ -0,0 +1,162 @@
|
||||
modules:
|
||||
http_2xx:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
proxy_url: "http://127.0.0.1:23080"
|
||||
skip_resolve_phase_with_proxy: true
|
||||
follow_redirects: true
|
||||
valid_status_codes: [] # Defaults to 2xx
|
||||
method: GET
|
||||
http_with_proxy:
|
||||
prober: http
|
||||
http:
|
||||
proxy_url: "http://127.0.0.1:3128"
|
||||
skip_resolve_phase_with_proxy: true
|
||||
http_with_proxy_and_headers:
|
||||
prober: http
|
||||
http:
|
||||
proxy_url: "http://127.0.0.1:3128"
|
||||
proxy_connect_header:
|
||||
Proxy-Authorization:
|
||||
- Bearer token
|
||||
http_post_2xx:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
method: POST
|
||||
headers:
|
||||
Content-Type: application/json
|
||||
body: '{}'
|
||||
http_post_body_file:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
method: POST
|
||||
body_file: "/files/body.txt"
|
||||
http_basic_auth_example:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
method: POST
|
||||
headers:
|
||||
Host: "login.example.com"
|
||||
basic_auth:
|
||||
username: "username"
|
||||
password: "mysecret"
|
||||
http_2xx_oauth_client_credentials:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
|
||||
follow_redirects: true
|
||||
preferred_ip_protocol: "ip4"
|
||||
valid_status_codes:
|
||||
- 200
|
||||
- 201
|
||||
oauth2:
|
||||
client_id: "client_id"
|
||||
client_secret: "client_secret"
|
||||
token_url: "https://api.example.com/token"
|
||||
endpoint_params:
|
||||
grant_type: "client_credentials"
|
||||
http_custom_ca_example:
|
||||
prober: http
|
||||
http:
|
||||
method: GET
|
||||
tls_config:
|
||||
ca_file: "/certs/my_cert.crt"
|
||||
http_gzip:
|
||||
prober: http
|
||||
http:
|
||||
method: GET
|
||||
compression: gzip
|
||||
http_gzip_with_accept_encoding:
|
||||
prober: http
|
||||
http:
|
||||
method: GET
|
||||
compression: gzip
|
||||
headers:
|
||||
Accept-Encoding: gzip
|
||||
tls_connect:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
tls: true
|
||||
tcp_connect_example:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
imap_starttls:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
query_response:
|
||||
- expect: "OK.*STARTTLS"
|
||||
- send: ". STARTTLS"
|
||||
- expect: "OK"
|
||||
- starttls: true
|
||||
- send: ". capability"
|
||||
- expect: "CAPABILITY IMAP4rev1"
|
||||
smtp_starttls:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
query_response:
|
||||
- expect: "^220 ([^ ]+) ESMTP (.+)$"
|
||||
- send: "EHLO prober\r"
|
||||
- expect: "^250-STARTTLS"
|
||||
- send: "STARTTLS\r"
|
||||
- expect: "^220"
|
||||
- starttls: true
|
||||
- send: "EHLO prober\r"
|
||||
- expect: "^250-AUTH"
|
||||
- send: "QUIT\r"
|
||||
irc_banner_example:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
query_response:
|
||||
- send: "NICK prober"
|
||||
- send: "USER prober prober prober :prober"
|
||||
- expect: "PING :([^ ]+)"
|
||||
send: "PONG ${1}"
|
||||
- expect: "^:[^ ]+ 001"
|
||||
icmp:
|
||||
prober: icmp
|
||||
timeout: 5s
|
||||
icmp:
|
||||
preferred_ip_protocol: "ip4"
|
||||
dns_udp_example:
|
||||
prober: dns
|
||||
timeout: 5s
|
||||
dns:
|
||||
query_name: "www.prometheus.io"
|
||||
query_type: "A"
|
||||
valid_rcodes:
|
||||
- NOERROR
|
||||
validate_answer_rrs:
|
||||
fail_if_matches_regexp:
|
||||
- ".*127.0.0.1"
|
||||
fail_if_all_match_regexp:
|
||||
- ".*127.0.0.1"
|
||||
fail_if_not_matches_regexp:
|
||||
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
|
||||
fail_if_none_matches_regexp:
|
||||
- "127.0.0.1"
|
||||
validate_authority_rrs:
|
||||
fail_if_matches_regexp:
|
||||
- ".*127.0.0.1"
|
||||
validate_additional_rrs:
|
||||
fail_if_matches_regexp:
|
||||
- ".*127.0.0.1"
|
||||
dns_soa:
|
||||
prober: dns
|
||||
dns:
|
||||
query_name: "prometheus.io"
|
||||
query_type: "SOA"
|
||||
dns_tcp_example:
|
||||
prober: dns
|
||||
dns:
|
||||
transport_protocol: "tcp" # defaults to "udp"
|
||||
preferred_ip_protocol: "ip4" # defaults to "ip6"
|
||||
query_name: "www.prometheus.io"
|
69
m/hut/configuration.nix
Normal file
69
m/hut/configuration.nix
Normal file
@ -0,0 +1,69 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
|
||||
../module/ceph.nix
|
||||
../module/debuginfod.nix
|
||||
../module/emulation.nix
|
||||
../module/slurm-client.nix
|
||||
./gitlab-runner.nix
|
||||
./monitoring.nix
|
||||
./nfs.nix
|
||||
./slurm-server.nix
|
||||
./nix-serve.nix
|
||||
./public-inbox.nix
|
||||
./gitea.nix
|
||||
./msmtp.nix
|
||||
./postgresql.nix
|
||||
./nginx.nix
|
||||
./p.nix
|
||||
#./pxe.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f";
|
||||
|
||||
fileSystems = {
|
||||
"/" = lib.mkForce {
|
||||
device = "/dev/disk/by-label/nvme";
|
||||
fsType = "ext4";
|
||||
neededForBoot = true;
|
||||
options = [ "noatime" ];
|
||||
};
|
||||
|
||||
"/boot" = lib.mkForce {
|
||||
device = "/dev/disk/by-label/nixos-boot";
|
||||
fsType = "ext4";
|
||||
neededForBoot = true;
|
||||
};
|
||||
};
|
||||
|
||||
networking = {
|
||||
hostName = "hut";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.7";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.7";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
firewall = {
|
||||
extraCommands = ''
|
||||
# Accept all proxy traffic from compute nodes but not the login
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept
|
||||
'';
|
||||
# Flush all rules and chains on stop so it won't break on start
|
||||
extraStopCommands = ''
|
||||
iptables -F
|
||||
iptables -X
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
# Allow proxy to bind to the ethernet interface
|
||||
services.openssh.settings.GatewayPorts = "clientspecified";
|
||||
}
|
63
m/hut/gitea.nix
Normal file
63
m/hut/gitea.nix
Normal file
@ -0,0 +1,63 @@
|
||||
{ config, lib, ... }:
|
||||
{
|
||||
age.secrets.giteaRunnerToken.file = ../../secrets/gitea-runner-token.age;
|
||||
|
||||
services.gitea = {
|
||||
enable = true;
|
||||
appName = "Gitea in the jungle";
|
||||
|
||||
settings = {
|
||||
server = {
|
||||
ROOT_URL = "https://jungle.bsc.es/git/";
|
||||
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
|
||||
LANDING_PAGE = "explore";
|
||||
};
|
||||
metrics.ENABLED = true;
|
||||
service = {
|
||||
REGISTER_MANUAL_CONFIRM = true;
|
||||
ENABLE_NOTIFY_MAIL = true;
|
||||
};
|
||||
log.LEVEL = "Warn";
|
||||
|
||||
mailer = {
|
||||
ENABLED = true;
|
||||
FROM = "jungle-robot@bsc.es";
|
||||
PROTOCOL = "sendmail";
|
||||
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
|
||||
SENDMAIL_ARGS = "--";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
services.gitea-actions-runner.instances = {
|
||||
runrun = {
|
||||
enable = true;
|
||||
name = "runrun";
|
||||
url = "https://jungle.bsc.es/git/";
|
||||
tokenFile = config.age.secrets.giteaRunnerToken.path;
|
||||
labels = [ "native:host" ];
|
||||
settings.runner.capacity = 8;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.gitea-runner-runrun = {
|
||||
path = [ "/run/current-system/sw" ];
|
||||
serviceConfig = {
|
||||
# DynamicUser doesn't work well with SSH
|
||||
DynamicUser = lib.mkForce false;
|
||||
User = "gitea-runner";
|
||||
Group = "gitea-runner";
|
||||
};
|
||||
};
|
||||
|
||||
users.users.gitea-runner = {
|
||||
isSystemUser = true;
|
||||
home = "/var/lib/gitea-runner";
|
||||
description = "Gitea Runner";
|
||||
group = "gitea-runner";
|
||||
extraGroups = [ "docker" ];
|
||||
createHome = true;
|
||||
};
|
||||
users.groups.gitea-runner = {};
|
||||
}
|
||||
|
126
m/hut/gitlab-runner.nix
Normal file
126
m/hut/gitlab-runner.nix
Normal file
@ -0,0 +1,126 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
|
||||
{
|
||||
age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age;
|
||||
age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age;
|
||||
age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age;
|
||||
|
||||
services.gitlab-runner = {
|
||||
enable = true;
|
||||
settings.concurrent = 5;
|
||||
services = let
|
||||
common-shell = {
|
||||
executor = "shell";
|
||||
environmentVariables = {
|
||||
SHELL = "${pkgs.bash}/bin/bash";
|
||||
};
|
||||
};
|
||||
common-docker = {
|
||||
executor = "docker";
|
||||
dockerImage = "debian:stable";
|
||||
registrationFlags = [
|
||||
"--docker-network-mode host"
|
||||
];
|
||||
environmentVariables = {
|
||||
https_proxy = "http://hut:23080";
|
||||
http_proxy = "http://hut:23080";
|
||||
};
|
||||
};
|
||||
in {
|
||||
# For pm.bsc.es/gitlab
|
||||
gitlab-pm-shell = common-shell // {
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path;
|
||||
};
|
||||
gitlab-pm-docker = common-docker // {
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path;
|
||||
};
|
||||
|
||||
gitlab-bsc-docker = {
|
||||
# gitlab.bsc.es still uses the old token mechanism
|
||||
registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path;
|
||||
tagList = [ "docker" "hut" ];
|
||||
environmentVariables = {
|
||||
# We cannot access the hut local interface from docker, so we connect
|
||||
# to hut directly via the ethernet one.
|
||||
https_proxy = "http://hut:23080";
|
||||
http_proxy = "http://hut:23080";
|
||||
};
|
||||
executor = "docker";
|
||||
dockerImage = "alpine";
|
||||
dockerVolumes = [
|
||||
"/nix/store:/nix/store:ro"
|
||||
"/nix/var/nix/db:/nix/var/nix/db:ro"
|
||||
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
|
||||
];
|
||||
dockerExtraHosts = [
|
||||
# Required to pass the proxy via hut
|
||||
"hut:10.0.40.7"
|
||||
];
|
||||
dockerDisableCache = true;
|
||||
registrationFlags = [
|
||||
# Increase build log length to 64 MiB
|
||||
"--output-limit 65536"
|
||||
];
|
||||
preBuildScript = pkgs.writeScript "setup-container" ''
|
||||
mkdir -p -m 0755 /nix/var/log/nix/drvs
|
||||
mkdir -p -m 0755 /nix/var/nix/gcroots
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles
|
||||
mkdir -p -m 0755 /nix/var/nix/temproots
|
||||
mkdir -p -m 0755 /nix/var/nix/userpool
|
||||
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
|
||||
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
|
||||
mkdir -p -m 0700 "$HOME/.nix-defexpr"
|
||||
mkdir -p -m 0700 "$HOME/.ssh"
|
||||
cat > "$HOME/.ssh/config" << EOF
|
||||
Host bscpm04.bsc.es gitlab-internal.bsc.es
|
||||
User git
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
EOF
|
||||
cat >> "$HOME/.ssh/known_hosts" << EOF
|
||||
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
|
||||
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
|
||||
EOF
|
||||
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
|
||||
# Required to load SSL certificate paths
|
||||
. ${pkgs.cacert}/nix-support/setup-hook
|
||||
'';
|
||||
environmentVariables = {
|
||||
ENV = "/etc/profile";
|
||||
USER = "root";
|
||||
NIX_REMOTE = "daemon";
|
||||
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# DOCKER* chains are useless, override at FORWARD and nixos-fw
|
||||
networking.firewall.extraCommands = ''
|
||||
# Don't forward any traffic from docker
|
||||
iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse
|
||||
|
||||
# Allow incoming traffic from docker to 23080
|
||||
iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT
|
||||
'';
|
||||
|
||||
#systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash";
|
||||
systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false;
|
||||
systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner";
|
||||
systemd.services.gitlab-runner.serviceConfig.Group = "gitlab-runner";
|
||||
systemd.services.gitlab-runner.serviceConfig.ExecStart = lib.mkForce
|
||||
''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}'';
|
||||
|
||||
users.users.gitlab-runner = {
|
||||
uid = config.ids.uids.gitlab-runner;
|
||||
#isNormalUser = true;
|
||||
home = "/var/lib/gitlab-runner";
|
||||
description = "Gitlab Runner";
|
||||
group = "gitlab-runner";
|
||||
extraGroups = [ "docker" ];
|
||||
createHome = true;
|
||||
};
|
||||
users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner;
|
||||
}
|
31
m/hut/gpfs-probe.nix
Normal file
31
m/hut/gpfs-probe.nix
Normal file
@ -0,0 +1,31 @@
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { }
|
||||
''
|
||||
cp ${./gpfs-probe.sh} $out;
|
||||
chmod +x $out
|
||||
''
|
||||
;
|
||||
in
|
||||
{
|
||||
# Use a new user to handle the SSH keys
|
||||
users.groups.ssh-robot = { };
|
||||
users.users.ssh-robot = {
|
||||
description = "SSH Robot";
|
||||
isNormalUser = true;
|
||||
home = "/var/lib/ssh-robot";
|
||||
};
|
||||
|
||||
systemd.services.gpfs-probe = {
|
||||
description = "Daemon to report GPFS latency via SSH";
|
||||
path = [ pkgs.openssh pkgs.netcat ];
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "default.target" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}";
|
||||
User = "ssh-robot";
|
||||
Group = "ssh-robot";
|
||||
};
|
||||
};
|
||||
}
|
18
m/hut/gpfs-probe.sh
Executable file
18
m/hut/gpfs-probe.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/sh
|
||||
|
||||
N=500
|
||||
|
||||
t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
|
||||
|
||||
if [ -z "$t" ]; then
|
||||
t="5.00"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
|
||||
|
||||
# HELP gpfs_touch_latency Time to create $N files.
|
||||
# TYPE gpfs_touch_latency gauge
|
||||
gpfs_touch_latency $t
|
||||
EOF
|
272
m/hut/monitoring.nix
Normal file
272
m/hut/monitoring.nix
Normal file
@ -0,0 +1,272 @@
|
||||
{ config, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../module/slurm-exporter.nix
|
||||
../module/meteocat-exporter.nix
|
||||
../module/upc-qaire-exporter.nix
|
||||
./gpfs-probe.nix
|
||||
./nix-daemon-exporter.nix
|
||||
];
|
||||
|
||||
age.secrets.grafanaJungleRobotPassword = {
|
||||
file = ../../secrets/jungle-robot-password.age;
|
||||
owner = "grafana";
|
||||
mode = "400";
|
||||
};
|
||||
|
||||
age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age;
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
server = {
|
||||
domain = "jungle.bsc.es";
|
||||
root_url = "%(protocol)s://%(domain)s/grafana";
|
||||
serve_from_sub_path = true;
|
||||
http_port = 2342;
|
||||
http_addr = "127.0.0.1";
|
||||
};
|
||||
smtp = {
|
||||
enabled = true;
|
||||
from_address = "jungle-robot@bsc.es";
|
||||
user = "jungle-robot";
|
||||
# Read the password from a file, which is only readable by grafana user
|
||||
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
|
||||
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
|
||||
host = "mail.bsc.es:465";
|
||||
startTLS_policy = "NoStartTLS";
|
||||
};
|
||||
feature_toggles.publicDashboards = true;
|
||||
"auth.anonymous".enabled = true;
|
||||
log.level = "warn";
|
||||
};
|
||||
};
|
||||
|
||||
# Make grafana alerts also use the proxy
|
||||
systemd.services.grafana.environment = config.networking.proxy.envVars;
|
||||
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = 9001;
|
||||
retentionTime = "5y";
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
|
||||
systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false;
|
||||
systemd.services.prometheus-ipmi-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
|
||||
|
||||
# We need access to the devices to monitor the disk space
|
||||
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
|
||||
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
|
||||
|
||||
virtualisation.docker.daemon.settings = {
|
||||
metrics-addr = "127.0.0.1:9323";
|
||||
};
|
||||
|
||||
# Required to allow the smartctl exporter to read the nvme0 character device,
|
||||
# see the commit message on:
|
||||
# https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80
|
||||
services.udev.extraRules = ''
|
||||
SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk"
|
||||
'';
|
||||
|
||||
services.prometheus = {
|
||||
|
||||
exporters = {
|
||||
ipmi = {
|
||||
enable = true;
|
||||
group = "root";
|
||||
user = "root";
|
||||
configFile = config.age.secrets.ipmiYml.path;
|
||||
# extraFlags = [ "--log.level=debug" ];
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" "logind" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
smartctl = {
|
||||
enable = true;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
blackbox = {
|
||||
enable = true;
|
||||
listenAddress = "127.0.0.1";
|
||||
configFile = ./blackbox.yml;
|
||||
};
|
||||
};
|
||||
|
||||
scrapeConfigs = [
|
||||
{
|
||||
job_name = "xeon07";
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.node.port}"
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.ipmi.port}"
|
||||
"127.0.0.1:9323"
|
||||
"127.0.0.1:9252"
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
|
||||
"127.0.0.1:9341" # Slurm exporter
|
||||
"127.0.0.1:9966" # GPFS custom exporter
|
||||
"127.0.0.1:9999" # Nix-daemon custom exporter
|
||||
"127.0.0.1:9929" # Meteocat custom exporter
|
||||
"127.0.0.1:9928" # UPC Qaire custom exporter
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
|
||||
];
|
||||
}];
|
||||
}
|
||||
{
|
||||
job_name = "ceph";
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"10.0.40.40:9283" # Ceph statistics
|
||||
"10.0.40.40:9002" # Node exporter
|
||||
"10.0.40.42:9002" # Node exporter
|
||||
];
|
||||
}];
|
||||
}
|
||||
{
|
||||
job_name = "blackbox-http";
|
||||
metrics_path = "/probe";
|
||||
params = { module = [ "http_2xx" ]; };
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"https://www.google.com/robots.txt"
|
||||
"https://pm.bsc.es/"
|
||||
"https://pm.bsc.es/gitlab/"
|
||||
"https://jungle.bsc.es/"
|
||||
"https://gitlab.bsc.es/"
|
||||
];
|
||||
}];
|
||||
relabel_configs = [
|
||||
{
|
||||
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
# Sets the "instance" label with the remote host we are querying
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
# Shows the host target address instead of the blackbox address
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "blackbox-icmp";
|
||||
metrics_path = "/probe";
|
||||
params = { module = [ "icmp" ]; };
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"1.1.1.1"
|
||||
"8.8.8.8"
|
||||
"ssfhead"
|
||||
"anella-bsc.cesca.cat"
|
||||
"upc-anella.cesca.cat"
|
||||
"fox.ac.upc.edu"
|
||||
"arenys5.ac.upc.edu"
|
||||
];
|
||||
}];
|
||||
relabel_configs = [
|
||||
{
|
||||
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
# Sets the "instance" label with the remote host we are querying
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
# Shows the host target address instead of the blackbox address
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "gitea";
|
||||
static_configs = [{ targets = [ "127.0.0.1:3000" ]; }];
|
||||
}
|
||||
{
|
||||
# Scrape the IPMI info of the hosts remotely via LAN
|
||||
job_name = "ipmi-lan";
|
||||
scrape_interval = "1m";
|
||||
scrape_timeout = "30s";
|
||||
metrics_path = "/ipmi";
|
||||
scheme = "http";
|
||||
relabel_configs = [
|
||||
{
|
||||
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
||||
source_labels = [ "__address__" ];
|
||||
separator = ";";
|
||||
regex = "(.*)(:80)?";
|
||||
target_label = "__param_target";
|
||||
replacement = "\${1}";
|
||||
action = "replace";
|
||||
}
|
||||
{
|
||||
# Sets the "instance" label with the remote host we are querying
|
||||
source_labels = [ "__param_target" ];
|
||||
separator = ";";
|
||||
regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end
|
||||
target_label = "instance";
|
||||
replacement = "\${1}";
|
||||
action = "replace";
|
||||
}
|
||||
{
|
||||
# Sets the fixed "module=lan" URL param
|
||||
separator = ";";
|
||||
regex = "(.*)";
|
||||
target_label = "__param_module";
|
||||
replacement = "lan";
|
||||
action = "replace";
|
||||
}
|
||||
{
|
||||
# Sets the target to query as the localhost IPMI exporter
|
||||
separator = ";";
|
||||
regex = ".*";
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:9290";
|
||||
action = "replace";
|
||||
}
|
||||
];
|
||||
|
||||
# Load the list of targets from another file
|
||||
file_sd_configs = [
|
||||
{
|
||||
files = [ "${./targets.yml}" ];
|
||||
refresh_interval = "30s";
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "ipmi-raccoon";
|
||||
metrics_path = "/ipmi";
|
||||
static_configs = [
|
||||
{ targets = [ "127.0.0.1:9291" ]; }
|
||||
];
|
||||
params = {
|
||||
target = [ "84.88.51.142" ];
|
||||
module = [ "raccoon" ];
|
||||
};
|
||||
}
|
||||
{
|
||||
job_name = "raccoon";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "127.0.0.1:19002" ]; # Node exporter
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
24
m/hut/msmtp.nix
Normal file
24
m/hut/msmtp.nix
Normal file
@ -0,0 +1,24 @@
|
||||
{ config, lib, ... }:
|
||||
{
|
||||
age.secrets.jungleRobotPassword = {
|
||||
file = ../../secrets/jungle-robot-password.age;
|
||||
group = "gitea";
|
||||
mode = "440";
|
||||
};
|
||||
|
||||
programs.msmtp = {
|
||||
enable = true;
|
||||
accounts = {
|
||||
default = {
|
||||
auth = true;
|
||||
tls = true;
|
||||
tls_starttls = false;
|
||||
port = 465;
|
||||
host = "mail.bsc.es";
|
||||
user = "jungle-robot";
|
||||
passwordeval = "cat ${config.age.secrets.jungleRobotPassword.path}";
|
||||
from = "jungle-robot@bsc.es";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
73
m/hut/nginx.nix
Normal file
73
m/hut/nginx.nix
Normal file
@ -0,0 +1,73 @@
|
||||
{ theFlake, pkgs, ... }:
|
||||
let
|
||||
website = pkgs.stdenv.mkDerivation {
|
||||
name = "jungle-web";
|
||||
src = theFlake;
|
||||
buildInputs = [ pkgs.hugo ];
|
||||
buildPhase = ''
|
||||
cd web
|
||||
rm -rf public/
|
||||
hugo
|
||||
'';
|
||||
installPhase = ''
|
||||
cp -r public $out
|
||||
'';
|
||||
# Don't mess doc/
|
||||
dontFixup = true;
|
||||
};
|
||||
in
|
||||
{
|
||||
networking.firewall.allowedTCPPorts = [ 80 ];
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
virtualHosts."jungle.bsc.es" = {
|
||||
root = "${website}";
|
||||
listen = [
|
||||
{
|
||||
addr = "0.0.0.0";
|
||||
port = 80;
|
||||
}
|
||||
];
|
||||
extraConfig = ''
|
||||
set_real_ip_from 127.0.0.1;
|
||||
set_real_ip_from 84.88.52.107;
|
||||
real_ip_recursive on;
|
||||
real_ip_header X-Forwarded-For;
|
||||
|
||||
location /git {
|
||||
rewrite ^/git$ / break;
|
||||
rewrite ^/git/(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:3000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /cache {
|
||||
rewrite ^/cache/(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:5000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /lists {
|
||||
proxy_pass http://127.0.0.1:8081;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /grafana {
|
||||
proxy_pass http://127.0.0.1:2342;
|
||||
proxy_redirect http:// $scheme://;
|
||||
proxy_set_header Host $host;
|
||||
# Websockets
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
location ~ ^/~(.+?)(/.*)?$ {
|
||||
alias /ceph/home/$1/public_html$2;
|
||||
index index.html index.htm;
|
||||
autoindex on;
|
||||
absolute_redirect off;
|
||||
}
|
||||
location /p/ {
|
||||
alias /ceph/p/;
|
||||
}
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
26
m/hut/nix-daemon-builds.sh
Executable file
26
m/hut/nix-daemon-builds.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Locate nix daemon pid
|
||||
nd=$(pgrep -o nix-daemon)
|
||||
|
||||
# Locate children of nix-daemon
|
||||
pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children")
|
||||
|
||||
# For each children, locate 2nd level children
|
||||
pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n')
|
||||
|
||||
cat <<EOF
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
|
||||
|
||||
# HELP nix_daemon_build Nix daemon derivation build state.
|
||||
# TYPE nix_daemon_build gauge
|
||||
EOF
|
||||
|
||||
for pid in $pids2; do
|
||||
name=$(cat /proc/$pid/environ 2>/dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.')
|
||||
user=$(ps -o uname= -p "$pid")
|
||||
if [ -n "$name" -a -n "$user" ]; then
|
||||
printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name"
|
||||
fi
|
||||
done
|
23
m/hut/nix-daemon-exporter.nix
Normal file
23
m/hut/nix-daemon-exporter.nix
Normal file
@ -0,0 +1,23 @@
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
script = pkgs.runCommand "nix-daemon-exporter.sh" { }
|
||||
''
|
||||
cp ${./nix-daemon-builds.sh} $out;
|
||||
chmod +x $out
|
||||
''
|
||||
;
|
||||
in
|
||||
{
|
||||
systemd.services.nix-daemon-exporter = {
|
||||
description = "Daemon to export nix-daemon metrics";
|
||||
path = [ pkgs.procps pkgs.ripgrep ];
|
||||
wantedBy = [ "default.target" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}";
|
||||
# Needed root to read the environment, potentially unsafe
|
||||
User = "root";
|
||||
Group = "root";
|
||||
};
|
||||
};
|
||||
}
|
16
m/hut/nix-serve.nix
Normal file
16
m/hut/nix-serve.nix
Normal file
@ -0,0 +1,16 @@
|
||||
{ config, ... }:
|
||||
|
||||
{
|
||||
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
|
||||
|
||||
services.nix-serve = {
|
||||
enable = true;
|
||||
# Only listen locally, as we serve it via ssh
|
||||
bindAddress = "127.0.0.1";
|
||||
port = 5000;
|
||||
|
||||
secretKeyFile = config.age.secrets.nixServe.path;
|
||||
# Public key:
|
||||
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
|
||||
};
|
||||
}
|
43
m/hut/p.nix
Normal file
43
m/hut/p.nix
Normal file
@ -0,0 +1,43 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
let
|
||||
p = pkgs.writeShellScriptBin "p" ''
|
||||
set -e
|
||||
cd /ceph
|
||||
pastedir="p/$USER"
|
||||
mkdir -p "$pastedir"
|
||||
|
||||
ext="txt"
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
ext="$1"
|
||||
fi
|
||||
|
||||
out=$(mktemp "$pastedir/XXXXXXXX.$ext")
|
||||
|
||||
cat > "$out"
|
||||
chmod go+r "$out"
|
||||
echo "https://jungle.bsc.es/$out"
|
||||
'';
|
||||
in
|
||||
{
|
||||
environment.systemPackages = with pkgs; [ p ];
|
||||
|
||||
# Make sure we have a directory per user. We cannot use the nice
|
||||
# systemd-tmpfiles-setup.service service because this is a remote FS, and it
|
||||
# may not be mounted when it runs.
|
||||
systemd.services.create-paste-dirs = let
|
||||
# Take only normal users in hut
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
commands = lib.concatLists (lib.mapAttrsToList
|
||||
(_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}"
|
||||
]) users);
|
||||
script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands);
|
||||
in {
|
||||
enable = true;
|
||||
wants = [ "remote-fs.target" ];
|
||||
after = [ "remote-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
}
|
19
m/hut/postgresql.nix
Normal file
19
m/hut/postgresql.nix
Normal file
@ -0,0 +1,19 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
services.postgresql = {
|
||||
enable = true;
|
||||
ensureDatabases = [ "perftestsdb" ];
|
||||
ensureUsers = [
|
||||
{ name = "anavarro"; ensureClauses.superuser = true; }
|
||||
{ name = "rarias"; ensureClauses.superuser = true; }
|
||||
{ name = "grafana"; }
|
||||
];
|
||||
authentication = ''
|
||||
#type database DBuser auth-method
|
||||
local perftestsdb rarias trust
|
||||
local perftestsdb anavarro trust
|
||||
local perftestsdb grafana trust
|
||||
'';
|
||||
};
|
||||
}
|
79
m/hut/public-inbox.css
Normal file
79
m/hut/public-inbox.css
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* CC0-1.0 <https://creativecommons.org/publicdomain/zero/1.0/legalcode>
|
||||
* Dark color scheme using 216 web-safe colors, inspired
|
||||
* somewhat by the default color scheme in mutt.
|
||||
* It reduces eyestrain for me, and energy usage for all:
|
||||
* https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
|
||||
*/
|
||||
|
||||
* {
|
||||
font-size: 14px;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
padding: 10px;
|
||||
background: #f5f5f5;
|
||||
}
|
||||
|
||||
hr {
|
||||
margin: 30px 0;
|
||||
}
|
||||
|
||||
body {
|
||||
max-width: 120ex; /* 120 columns wide */
|
||||
margin: 50px auto;
|
||||
}
|
||||
|
||||
/*
|
||||
* Underlined links add visual noise which make them hard-to-read.
|
||||
* Use colors to make them stand out, instead.
|
||||
*/
|
||||
a:link {
|
||||
color: #007;
|
||||
text-decoration: none;
|
||||
}
|
||||
a:visited {
|
||||
color:#504;
|
||||
}
|
||||
a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* quoted text in emails gets a different color */
|
||||
*.q { color:gray }
|
||||
|
||||
/*
|
||||
* these may be used with cgit <https://git.zx2c4.com/cgit/>, too.
|
||||
* (cgit uses <div>, public-inbox uses <span>)
|
||||
*/
|
||||
*.add { color:darkgreen } /* diff post-image lines */
|
||||
*.del { color:darkred } /* diff pre-image lines */
|
||||
*.head { color:black } /* diff header (metainformation) */
|
||||
*.hunk { color:gray } /* diff hunk-header */
|
||||
|
||||
/*
|
||||
* highlight 3.x colors (tested 3.18) for displaying blobs.
|
||||
* This doesn't use most of the colors available, as I find too
|
||||
* many colors overwhelming, so the default is commented out.
|
||||
*/
|
||||
.hl.num { color:#f30 } /* number */
|
||||
.hl.esc { color:#f0f } /* escape character */
|
||||
.hl.str { color:#f30 } /* string */
|
||||
.hl.ppc { color:#f0f } /* preprocessor */
|
||||
.hl.pps { color:#f30 } /* preprocessor string */
|
||||
.hl.slc { color:#09f } /* single-line comment */
|
||||
.hl.com { color:#09f } /* multi-line comment */
|
||||
/* .hl.opt { color:#ccc } */ /* operator */
|
||||
/* .hl.ipl { color:#ccc } */ /* interpolation */
|
||||
|
||||
/* keyword groups kw[a-z] */
|
||||
.hl.kwa { color:#ff0 }
|
||||
.hl.kwb { color:#0f0 }
|
||||
.hl.kwc { color:#ff0 }
|
||||
/* .hl.kwd { color:#ccc } */
|
||||
|
||||
/* line-number (unused by public-inbox) */
|
||||
/* .hl.lin { color:#ccc } */
|
||||
|
47
m/hut/public-inbox.nix
Normal file
47
m/hut/public-inbox.nix
Normal file
@ -0,0 +1,47 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
services.public-inbox = {
|
||||
enable = true;
|
||||
http = {
|
||||
enable = true;
|
||||
port = 8081;
|
||||
mounts = [ "/lists" ];
|
||||
};
|
||||
settings.publicinbox = {
|
||||
css = [ "${./public-inbox.css}" ];
|
||||
wwwlisting = "all";
|
||||
};
|
||||
inboxes = {
|
||||
bscpkgs = {
|
||||
url = "https://jungle.bsc.es/lists/bscpkgs";
|
||||
address = [ "~rodarima/bscpkgs@lists.sr.ht" ];
|
||||
watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ];
|
||||
description = "Patches for bscpkgs";
|
||||
listid = "~rodarima/bscpkgs.lists.sr.ht";
|
||||
};
|
||||
jungle = {
|
||||
url = "https://jungle.bsc.es/lists/jungle";
|
||||
address = [ "~rodarima/jungle@lists.sr.ht" ];
|
||||
watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ];
|
||||
description = "Patches for jungle";
|
||||
listid = "~rodarima/jungle.lists.sr.ht";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# We need access to the network for the watch service, as we will fetch the
|
||||
# emails directly from the IMAP server.
|
||||
systemd.services.public-inbox-watch.serviceConfig = {
|
||||
PrivateNetwork = lib.mkForce false;
|
||||
RestrictAddressFamilies = lib.mkForce [ "AF_UNIX" "AF_INET" "AF_INET6" ];
|
||||
KillSignal = "SIGKILL"; # Avoid slow shutdown
|
||||
|
||||
# Required for chmod(..., 02750) on directories by git, from
|
||||
# systemd.exec(8):
|
||||
# > Note that this restricts marking of any type of file system object with
|
||||
# > these bits, including both regular files and directories (where the SGID
|
||||
# > is a different meaning than for files, see documentation).
|
||||
RestrictSUIDSGID = lib.mkForce false;
|
||||
};
|
||||
}
|
35
m/hut/pxe.nix
Normal file
35
m/hut/pxe.nix
Normal file
@ -0,0 +1,35 @@
|
||||
{ theFlake, pkgs, ... }:
|
||||
|
||||
# This module describes a script that can launch the pixiecore daemon to serve a
|
||||
# NixOS image via PXE to a node to directly boot from there, without requiring a
|
||||
# working disk.
|
||||
|
||||
let
|
||||
# The host config must have the netboot-minimal.nix module too
|
||||
host = theFlake.nixosConfigurations.lake2;
|
||||
sys = host.config.system;
|
||||
build = sys.build;
|
||||
kernel = "${build.kernel}/bzImage";
|
||||
initrd = "${build.netbootRamdisk}/initrd";
|
||||
init = "${build.toplevel}/init";
|
||||
|
||||
script = pkgs.writeShellScriptBin "pixiecore-helper" ''
|
||||
#!/usr/bin/env bash -x
|
||||
|
||||
${pkgs.pixiecore}/bin/pixiecore \
|
||||
boot ${kernel} ${initrd} --cmdline "init=${init} loglevel=4" \
|
||||
--debug --dhcp-no-bind --port 64172 --status-port 64172 "$@"
|
||||
'';
|
||||
in
|
||||
{
|
||||
## We need a DHCP server to provide the IP
|
||||
#services.dnsmasq = {
|
||||
# enable = true;
|
||||
# settings = {
|
||||
# domain-needed = true;
|
||||
# dhcp-range = [ "192.168.0.2,192.168.0.254" ];
|
||||
# };
|
||||
#};
|
||||
|
||||
environment.systemPackages = [ script ];
|
||||
}
|
7
m/hut/slurm-server.nix
Normal file
7
m/hut/slurm-server.nix
Normal file
@ -0,0 +1,7 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
services.slurm = {
|
||||
server.enable = true;
|
||||
};
|
||||
}
|
15
m/hut/targets.yml
Normal file
15
m/hut/targets.yml
Normal file
@ -0,0 +1,15 @@
|
||||
- targets:
|
||||
- owl1-ipmi
|
||||
- owl2-ipmi
|
||||
- xeon03-ipmi
|
||||
- xeon04-ipmi
|
||||
- koro-ipmi
|
||||
- xeon06-ipmi
|
||||
- hut-ipmi
|
||||
- eudy-ipmi
|
||||
# Storage
|
||||
- bay-ipmi
|
||||
- oss01-ipmi
|
||||
- lake2-ipmi
|
||||
labels:
|
||||
job: ipmi-lan
|
35
m/koro/configuration.nix
Normal file
35
m/koro/configuration.nix
Normal file
@ -0,0 +1,35 @@
|
||||
{ config, pkgs, lib, modulesPath, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||
|
||||
../eudy/cpufreq.nix
|
||||
../eudy/users.nix
|
||||
./kernel.nix
|
||||
];
|
||||
|
||||
# Select this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5376d2";
|
||||
|
||||
# disable automatic garbage collector
|
||||
nix.gc.automatic = lib.mkForce false;
|
||||
|
||||
# members of the tracing group can use the lttng-provided kernel events
|
||||
# without root permissions
|
||||
users.groups.tracing.members = [ "arocanon" "vlopez" ];
|
||||
|
||||
# set up both ethernet and infiniband ips
|
||||
networking = {
|
||||
hostName = "koro";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.5";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.5";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
}
|
70
m/koro/kernel.nix
Normal file
70
m/koro/kernel.nix
Normal file
@ -0,0 +1,70 @@
|
||||
{ pkgs, lib, ... }:
|
||||
|
||||
let
|
||||
#fcs-devel = pkgs.linuxPackages_custom {
|
||||
# version = "6.2.8";
|
||||
# src = /mnt/data/kernel/fcs/kernel/src;
|
||||
# configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig;
|
||||
#};
|
||||
|
||||
#fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false;
|
||||
#fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false;
|
||||
#fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true;
|
||||
#fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true;
|
||||
#fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom {
|
||||
# version = "6.2.8";
|
||||
# src = builtins.fetchGit {
|
||||
# url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
||||
# rev = gitCommit;
|
||||
# ref = "fcs";
|
||||
# };
|
||||
# configfile = if lockdep then ./configs/lockdep else ./configs/defconfig;
|
||||
#};
|
||||
|
||||
kernel = nixos-fcs;
|
||||
|
||||
nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
|
||||
version = "6.2.8";
|
||||
src = builtins.fetchGit {
|
||||
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
||||
rev = gitCommit;
|
||||
ref = branch;
|
||||
};
|
||||
structuredExtraConfig = with lib.kernel; {
|
||||
# add general custom kernel options here
|
||||
} // lib.optionalAttrs lockStat {
|
||||
LOCK_STAT = yes;
|
||||
} // lib.optionalAttrs preempt {
|
||||
PREEMPT = lib.mkForce yes;
|
||||
PREEMPT_VOLUNTARY = lib.mkForce no;
|
||||
};
|
||||
kernelPatches = [];
|
||||
extraMeta.branch = lib.versions.majorMinor version;
|
||||
}));
|
||||
|
||||
nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";};
|
||||
nixos-fcs-lockstat = nixos-fcs.override {
|
||||
lockStat = true;
|
||||
};
|
||||
nixos-fcs-lockstat-preempt = nixos-fcs.override {
|
||||
lockStat = true;
|
||||
preempt = true;
|
||||
};
|
||||
latest = pkgs.linuxPackages_latest;
|
||||
|
||||
in {
|
||||
imports = [
|
||||
../eudy/kernel/lttng.nix
|
||||
../eudy/kernel/perf.nix
|
||||
];
|
||||
boot.kernelPackages = lib.mkForce kernel;
|
||||
|
||||
# disable all cpu mitigations
|
||||
boot.kernelParams = [
|
||||
"mitigations=off"
|
||||
];
|
||||
|
||||
# enable memory overcommit, needed to build a taglibc system using nix after
|
||||
# increasing the openblas memory footprint
|
||||
boot.kernel.sysctl."vm.overcommit_memory" = 1;
|
||||
}
|
83
m/lake2/configuration.nix
Normal file
83
m/lake2/configuration.nix
Normal file
@ -0,0 +1,83 @@
|
||||
{ config, pkgs, lib, modulesPath, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../module/monitoring.nix
|
||||
];
|
||||
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"kernel.yama.ptrace_scope" = lib.mkForce "1";
|
||||
};
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
ceph
|
||||
];
|
||||
|
||||
services.ceph = {
|
||||
enable = true;
|
||||
global = {
|
||||
fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b";
|
||||
monHost = "10.0.40.40";
|
||||
monInitialMembers = "bay";
|
||||
clusterNetwork = "10.0.40.40/24"; # Use Ethernet only
|
||||
};
|
||||
osd = {
|
||||
enable = true;
|
||||
# One daemon per NVME disk
|
||||
daemons = [ "4" "5" "6" "7" ];
|
||||
extraConfig = {
|
||||
"osd crush chooseleaf type" = "0";
|
||||
"osd journal size" = "10000";
|
||||
"osd pool default min size" = "2";
|
||||
"osd pool default pg num" = "200";
|
||||
"osd pool default pgp num" = "200";
|
||||
"osd pool default size" = "3";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
networking = {
|
||||
hostName = "lake2";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.42";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.42";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
firewall = {
|
||||
extraCommands = ''
|
||||
# Accept all incoming TCP traffic from bay
|
||||
iptables -A nixos-fw -p tcp -s bay -j nixos-fw-accept
|
||||
# Accept monitoring requests from hut
|
||||
iptables -A nixos-fw -p tcp -s hut --dport 9002 -j nixos-fw-accept
|
||||
# Accept all Ceph traffic from the local network
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
# Missing service for volumes, see:
|
||||
# https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/
|
||||
systemd.services.ceph-volume = {
|
||||
enable = true;
|
||||
description = "Ceph Volume activation";
|
||||
unitConfig = {
|
||||
Type = "oneshot";
|
||||
After = "local-fs.target";
|
||||
Wants = "local-fs.target";
|
||||
};
|
||||
path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ];
|
||||
serviceConfig = {
|
||||
KillMode = "none";
|
||||
Environment = "CEPH_VOLUME_TIMEOUT=10000";
|
||||
ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'";
|
||||
TimeoutSec = "0";
|
||||
};
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
};
|
||||
}
|
70
m/map.nix
Normal file
70
m/map.nix
Normal file
@ -0,0 +1,70 @@
|
||||
{
|
||||
# In physical order from top to bottom (see note below)
|
||||
ssf = {
|
||||
# Switches for Ethernet and OmniPath
|
||||
switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; };
|
||||
switch-opa = { pos=41; size=1; };
|
||||
|
||||
# SSF login
|
||||
ssfhead = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="operations@bsc.es"; };
|
||||
|
||||
# Storage
|
||||
bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
|
||||
lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; };
|
||||
lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; };
|
||||
|
||||
# Compute xeon
|
||||
owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; };
|
||||
owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; };
|
||||
xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
|
||||
# Slot 34 empty
|
||||
koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
|
||||
xeon06 = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
|
||||
hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
|
||||
eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };
|
||||
|
||||
# 16 KNL nodes, 4 per chassis
|
||||
knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; };
|
||||
knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; };
|
||||
knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; };
|
||||
knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; };
|
||||
|
||||
# Slot 19 empty
|
||||
|
||||
# EPI (hw team, guessed order)
|
||||
epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; };
|
||||
epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; };
|
||||
epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; };
|
||||
anon = { pos=14; size=2; }; # Unlabeled machine. Operative
|
||||
|
||||
# These are old and decommissioned (off)
|
||||
power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; };
|
||||
powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; };
|
||||
gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; };
|
||||
odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; };
|
||||
amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic
|
||||
moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; };
|
||||
};
|
||||
|
||||
bsc2218 = {
|
||||
raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; };
|
||||
tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; };
|
||||
};
|
||||
|
||||
upc = {
|
||||
fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; };
|
||||
};
|
||||
|
||||
# NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the
|
||||
# bottom. Example:
|
||||
#
|
||||
# | ... | - [pos+size] <--- Label in chassis
|
||||
# +--------+
|
||||
# | node | - [pos+1]
|
||||
# | 2U | - [pos]
|
||||
# +------- +
|
||||
# | ... | - [pos-1]
|
||||
#
|
||||
# NOTE: The board and sn refers to the FRU information (Board Product and
|
||||
# Board Serial) via `ipmitool fru print 0`.
|
||||
}
|
24
m/module/ceph.nix
Normal file
24
m/module/ceph.nix
Normal file
@ -0,0 +1,24 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
# Mounts the /ceph filesystem at boot
|
||||
{
|
||||
environment.systemPackages = with pkgs; [
|
||||
ceph-client
|
||||
fio # For benchmarks
|
||||
];
|
||||
|
||||
# We need the ceph module loaded as the mount.ceph binary fails to run the
|
||||
# modprobe command.
|
||||
boot.kernelModules = [ "ceph" ];
|
||||
|
||||
age.secrets.cephUser.file = ../../secrets/ceph-user.age;
|
||||
|
||||
fileSystems."/ceph" = {
|
||||
fsType = "ceph";
|
||||
device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/";
|
||||
options = [
|
||||
"mon_addr=10.0.40.40"
|
||||
"secretfile=${config.age.secrets.cephUser.path}"
|
||||
];
|
||||
};
|
||||
}
|
3
m/module/debuginfod.nix
Normal file
3
m/module/debuginfod.nix
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
services.nixseparatedebuginfod.enable = true;
|
||||
}
|
3
m/module/emulation.nix
Normal file
3
m/module/emulation.nix
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ];
|
||||
}
|
10
m/module/hut-substituter.nix
Normal file
10
m/module/hut-substituter.nix
Normal file
@ -0,0 +1,10 @@
|
||||
{ config, ... }:
|
||||
{
|
||||
nix.settings =
|
||||
# Don't add hut as a cache to itself
|
||||
assert config.networking.hostName != "hut";
|
||||
{
|
||||
substituters = [ "http://hut/cache" ];
|
||||
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
||||
};
|
||||
}
|
24
m/module/jungle-users.nix
Normal file
24
m/module/jungle-users.nix
Normal file
@ -0,0 +1,24 @@
|
||||
{ config, lib, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
{
|
||||
options = {
|
||||
users.jungleUsers = mkOption {
|
||||
type = types.attrsOf (types.anything // { check = (x: x ? "hosts"); });
|
||||
description = ''
|
||||
Same as users.users but with the extra `hosts` attribute, which controls
|
||||
access to the nodes by `networking.hostName`.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
config = let
|
||||
allowedUser = host: userConf: builtins.elem host userConf.hosts;
|
||||
filterUsers = host: users: filterAttrs (n: v: allowedUser host v) users;
|
||||
removeHosts = users: mapAttrs (n: v: builtins.removeAttrs v [ "hosts" ]) users;
|
||||
currentHost = config.networking.hostName;
|
||||
in {
|
||||
users.users = removeHosts (filterUsers currentHost config.users.jungleUsers);
|
||||
};
|
||||
}
|
17
m/module/meteocat-exporter.nix
Normal file
17
m/module/meteocat-exporter.nix
Normal file
@ -0,0 +1,17 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
{
|
||||
systemd.services."prometheus-meteocat-exporter" = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Restart = mkDefault "always";
|
||||
PrivateTmp = mkDefault true;
|
||||
WorkingDirectory = mkDefault "/tmp";
|
||||
DynamicUser = mkDefault true;
|
||||
ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter";
|
||||
};
|
||||
};
|
||||
}
|
25
m/module/monitoring.nix
Normal file
25
m/module/monitoring.nix
Normal file
@ -0,0 +1,25 @@
|
||||
{ config, lib, ... }:
|
||||
|
||||
{
|
||||
# We need access to the devices to monitor the disk space
|
||||
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
|
||||
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
|
||||
|
||||
# Required to allow the smartctl exporter to read the nvme0 character device,
|
||||
# see the commit message on:
|
||||
# https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80
|
||||
services.udev.extraRules = ''
|
||||
SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk"
|
||||
'';
|
||||
|
||||
services.prometheus = {
|
||||
exporters = {
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
};
|
||||
smartctl.enable = true;
|
||||
};
|
||||
};
|
||||
}
|
126
m/module/slurm-client.nix
Normal file
126
m/module/slurm-client.nix
Normal file
@ -0,0 +1,126 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
let
|
||||
suspendProgram = pkgs.writeScript "suspend.sh" ''
|
||||
#!/usr/bin/env bash
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Shutting down host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
||||
done
|
||||
'';
|
||||
|
||||
resumeProgram = pkgs.writeScript "resume.sh" ''
|
||||
#!/usr/bin/env bash
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Starting host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
||||
done
|
||||
'';
|
||||
|
||||
in {
|
||||
systemd.services.slurmd.serviceConfig = {
|
||||
# Kill all processes in the control group on stop/restart. This will kill
|
||||
# all the jobs running, so ensure that we only upgrade when the nodes are
|
||||
# not in use. See:
|
||||
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
||||
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
||||
KillMode = lib.mkForce "control-group";
|
||||
};
|
||||
|
||||
services.slurm = {
|
||||
client.enable = true;
|
||||
controlMachine = "hut";
|
||||
clusterName = "jungle";
|
||||
nodeName = [
|
||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
||||
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
||||
];
|
||||
|
||||
partitionName = [
|
||||
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
];
|
||||
|
||||
# See slurm.conf(5) for more details about these options.
|
||||
extraConfig = ''
|
||||
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
||||
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
||||
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
||||
# library in SLURM (--mpi=pmix). See more details here:
|
||||
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
||||
MpiDefault=pmix
|
||||
|
||||
# When a node reboots return that node to the slurm queue as soon as it
|
||||
# becomes operative again.
|
||||
ReturnToService=2
|
||||
|
||||
# Track all processes by using a cgroup
|
||||
ProctrackType=proctrack/cgroup
|
||||
|
||||
# Enable task/affinity to allow the jobs to run in a specified subset of
|
||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||
TaskPlugin=task/affinity,task/cgroup
|
||||
|
||||
# Power off unused nodes until they are requested
|
||||
SuspendProgram=${suspendProgram}
|
||||
SuspendTimeout=60
|
||||
ResumeProgram=${resumeProgram}
|
||||
ResumeTimeout=300
|
||||
SuspendExcNodes=hut
|
||||
|
||||
# Turn the nodes off after 1 hour of inactivity
|
||||
SuspendTime=3600
|
||||
|
||||
# Reduce port range so we can allow only this range in the firewall
|
||||
SrunPortRange=60000-61000
|
||||
|
||||
# Use cores as consumable resources. In SLURM terms, a core may have
|
||||
# multiple hardware threads (or CPUs).
|
||||
SelectType=select/cons_tres
|
||||
|
||||
# Ignore memory constraints and only use unused cores to share a node with
|
||||
# other jobs.
|
||||
SelectTypeParameters=CR_Core
|
||||
|
||||
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
# This sets up the "extern" step into which ssh-launched processes will be
|
||||
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
|
||||
# when a task runs (srun) so we can ssh early.
|
||||
PrologFlags=Alloc,Contain,X11
|
||||
|
||||
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
|
||||
# adopted by the external step, similar to tasks running in regular steps
|
||||
# LaunchParameters=ulimit_pam_adopt
|
||||
SlurmdDebug=debug5
|
||||
#DebugFlags=Protocol,Cgroup
|
||||
'';
|
||||
|
||||
extraCgroupConfig = ''
|
||||
CgroupPlugin=cgroup/v2
|
||||
#ConstrainCores=yes
|
||||
'';
|
||||
};
|
||||
|
||||
# Place the slurm config in /etc as this will be required by PAM
|
||||
environment.etc.slurm.source = config.services.slurm.etcSlurm;
|
||||
|
||||
age.secrets.mungeKey = {
|
||||
file = ../../secrets/munge-key.age;
|
||||
owner = "munge";
|
||||
group = "munge";
|
||||
};
|
||||
|
||||
services.munge = {
|
||||
enable = true;
|
||||
password = config.age.secrets.mungeKey.path;
|
||||
};
|
||||
}
|
28
m/module/slurm-exporter.nix
Normal file
28
m/module/slurm-exporter.nix
Normal file
@ -0,0 +1,28 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
# See also: https://github.com/NixOS/nixpkgs/pull/112010
|
||||
# And: https://github.com/NixOS/nixpkgs/pull/115839
|
||||
|
||||
with lib;
|
||||
|
||||
{
|
||||
systemd.services."prometheus-slurm-exporter" = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Restart = mkDefault "always";
|
||||
PrivateTmp = mkDefault true;
|
||||
WorkingDirectory = mkDefault "/tmp";
|
||||
DynamicUser = mkDefault true;
|
||||
ExecStart = ''
|
||||
${pkgs.prometheus-slurm-exporter}/bin/prometheus-slurm-exporter --listen-address "127.0.0.1:9341"
|
||||
'';
|
||||
Environment = [
|
||||
"PATH=${pkgs.slurm}/bin"
|
||||
# We need to specify the slurm config to be able to talk to the slurmd
|
||||
# daemon.
|
||||
"SLURM_CONF=${config.services.slurm.etcSlurm}/slurm.conf"
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
8
m/module/slurm-firewall.nix
Normal file
8
m/module/slurm-firewall.nix
Normal file
@ -0,0 +1,8 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
networking.firewall = {
|
||||
# Required for PMIx in SLURM, we should find a better way
|
||||
allowedTCPPortRanges = [ { from=1024; to=65535; } ];
|
||||
};
|
||||
}
|
19
m/module/slurm-hut-nix-store.nix
Normal file
19
m/module/slurm-hut-nix-store.nix
Normal file
@ -0,0 +1,19 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
# Mount the hut nix store via NFS
|
||||
fileSystems."/mnt/hut-nix-store" = {
|
||||
device = "hut:/nix/store";
|
||||
fsType = "nfs";
|
||||
options = [ "ro" ];
|
||||
};
|
||||
|
||||
systemd.services.slurmd.serviceConfig = {
|
||||
# When running a job, bind the hut store in /nix/store so the paths are
|
||||
# available too.
|
||||
# FIXME: This doesn't keep the programs in /run/current-system/sw/bin
|
||||
# available in the store. Ideally they should be merged but the overlay FS
|
||||
# doesn't work when the underlying directories change.
|
||||
BindReadOnlyPaths = "/mnt/hut-nix-store:/nix/store";
|
||||
};
|
||||
}
|
9
m/module/ssh-hut-extern.nix
Normal file
9
m/module/ssh-hut-extern.nix
Normal file
@ -0,0 +1,9 @@
|
||||
{
|
||||
programs.ssh.extraConfig = ''
|
||||
Host ssfhead
|
||||
HostName ssflogin.bsc.es
|
||||
Host hut
|
||||
ProxyJump ssfhead
|
||||
HostName xeon07
|
||||
'';
|
||||
}
|
17
m/module/upc-qaire-exporter.nix
Normal file
17
m/module/upc-qaire-exporter.nix
Normal file
@ -0,0 +1,17 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
{
|
||||
systemd.services."prometheus-upc-qaire-exporter" = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Restart = mkDefault "always";
|
||||
PrivateTmp = mkDefault true;
|
||||
WorkingDirectory = mkDefault "/tmp";
|
||||
DynamicUser = mkDefault true;
|
||||
ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter";
|
||||
};
|
||||
};
|
||||
}
|
28
m/owl1/configuration.nix
Normal file
28
m/owl1/configuration.nix
Normal file
@ -0,0 +1,28 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../module/ceph.nix
|
||||
../module/emulation.nix
|
||||
../module/slurm-client.nix
|
||||
../module/slurm-firewall.nix
|
||||
../module/debuginfod.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c";
|
||||
|
||||
networking = {
|
||||
hostName = "owl1";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.1";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.1";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
}
|
29
m/owl2/configuration.nix
Normal file
29
m/owl2/configuration.nix
Normal file
@ -0,0 +1,29 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../module/ceph.nix
|
||||
../module/emulation.nix
|
||||
../module/slurm-client.nix
|
||||
../module/slurm-firewall.nix
|
||||
../module/debuginfod.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629";
|
||||
|
||||
networking = {
|
||||
hostName = "owl2";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.2";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
# Watch out! The OmniPath device is not in the same place here:
|
||||
interfaces.ibp129s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.2";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
}
|
100
m/raccoon/configuration.nix
Normal file
100
m/raccoon/configuration.nix
Normal file
@ -0,0 +1,100 @@
|
||||
{ config, pkgs, lib, modulesPath, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/base.nix
|
||||
../module/emulation.nix
|
||||
../module/debuginfod.nix
|
||||
../module/ssh-hut-extern.nix
|
||||
../eudy/kernel/perf.nix
|
||||
];
|
||||
|
||||
# Don't install Grub on the disk yet
|
||||
boot.loader.grub.device = "nodev";
|
||||
|
||||
# Enable serial console
|
||||
boot.kernelParams = [
|
||||
"console=tty1"
|
||||
"console=ttyS1,115200"
|
||||
];
|
||||
|
||||
networking = {
|
||||
hostName = "raccoon";
|
||||
# Only BSC DNSs seem to be reachable from the office VLAN
|
||||
nameservers = [ "84.88.52.35" "84.88.52.36" ];
|
||||
defaultGateway = "84.88.51.129";
|
||||
interfaces.eno0.ipv4.addresses = [ {
|
||||
address = "84.88.51.152";
|
||||
prefixLength = 25;
|
||||
} ];
|
||||
interfaces.enp5s0f1.ipv4.addresses = [ {
|
||||
address = "10.0.44.1";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
nat = {
|
||||
enable = true;
|
||||
internalInterfaces = [ "enp5s0f1" ];
|
||||
externalInterface = "eno0";
|
||||
};
|
||||
hosts = {
|
||||
"10.0.44.4" = [ "tent" ];
|
||||
};
|
||||
};
|
||||
|
||||
nix.settings = {
|
||||
substituters = [ "https://jungle.bsc.es/cache" ];
|
||||
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
||||
};
|
||||
|
||||
# Enable performance governor
|
||||
powerManagement.cpuFreqGovernor = "performance";
|
||||
|
||||
# Configure Nvidia driver to use with CUDA
|
||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
||||
hardware.graphics.enable = true;
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
nixpkgs.config.nvidia.acceptLicense = true;
|
||||
services.xserver.videoDrivers = [ "nvidia" ];
|
||||
|
||||
# Disable garbage collection for now
|
||||
nix.gc.automatic = lib.mkForce false;
|
||||
|
||||
services.openssh.settings.X11Forwarding = true;
|
||||
|
||||
services.prometheus.exporters.node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
|
||||
users.motd = ''
|
||||
⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⢰⠇⡀⠀⠙⠻⡿⣦⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⡎⢰⣧⠀⠀⠀⠁⠈⠛⢿⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣠⣴⡦⠶⠟⠓⠚⠻⡄⠀
|
||||
⠀⠀⠀⠀⠀⠀⣧⠀⣱⣀⣰⣧⠀⢀⠀⣘⣿⣿⣦⣶⣄⣠⡀⠀⠀⣀⣀⣤⣴⣄⣀⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⣿⠿⠏⠁⠀⣀⣠⣶⣿⡶⣿⠀
|
||||
⠀⠀⠀⠀⠀⠀⣹⣆⠘⣿⣿⣿⣇⢸⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣾⣿⣿⣿⣿⣿⣿⣿⣿⣶⣶⣦⡀⣀⣤⣠⣤⡾⠋⠀⢀⣤⣶⣿⣿⣿⣿⣿⣿⣿⡀
|
||||
⠀⠀⠀⠀⠀⠀⠘⢿⡄⢼⣿⣿⣿⣿⣿⡟⠻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣵⣾⡾⠙⣋⣩⣽⣿⣿⣿⣿⢋⡼⠁
|
||||
⠀⠀⠀⠀⠀⠀⠀⠈⢻⣄⠸⢿⣿⣿⠿⠷⠀⠈⠀⣭⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣾⣿⣿⣿⣿⣿⣿⠇⡼⠁⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⢾⣯⡀⠀⢼⡿⠀⠀⠀⢼⠿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⣿⡿⣿⣿⣿⠿⣿⣯⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⢋⡼⠁⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⢻⡏⠠⣦⠁⠀⠀⠀⠀⠀⠟⠛⠛⣿⣿⣿⣿⣿⠿⠁⠀⠁⢿⠙⠁⠀⠛⠹⣿⣏⣾⣿⣿⣿⣿⣿⣿⣿⣿⠿⠃⣹⠁⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⣘⣧⠀⠙⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⡿⡿⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⢹⣿⠿⢿⣿⣿⣿⣿⣿⠋⢀⡤⠛⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⢹⡯⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣿⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠁⠀⢸⣿⣿⣿⠛⠉⠀⣰⠷⠀⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⠇⠀⠀⠀⠀⠀⢀⣿⡇⠀⠀⢻⣿⣿⠁⠀⠀⢠⣾⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⠟⢿⣿⣄⡀⢸⣿⡀⠀⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⢀⣿⠀⠀⠀⢰⣿⣿⡛⣿⣿⡄⢠⡺⠿⡍⠁⢀⣤⣿⣿⣿⠿⣷⣮⣉⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠈⣿⠀⠀⠈⣧⠀⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⢾⠉⠃⠀⣴⣿⣟⠻⣿⣿⣿⡇⢸⣿⣶⠀⢀⣾⣿⣿⣟⠿⣷⣾⣿⣿⣿⣿⣦⣤⣤⡤⠀⠀⠀⠀⠀⠁⠀⠀⠀⣼⠗⠀⠀⠀⠀
|
||||
⠀⠀⠐⢄⡀⠀⠀⠀⢘⡀⠀⢶⣾⣿⣿⣿⣿⡿⠋⠁⠈⠻⠉⠀⠚⠻⣿⣿⣿⣶⣾⣿⣿⣿⣿⣿⣿⣷⣬⣤⣶⣦⡀⣾⣶⣇⠀⠀⠈⢉⣷⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠈⠓⠶⢦⡽⠄⣈⣿⣿⣿⣿⣿⠏⠀⠀⠀⠀⠀⠀⠀⠀⠀⠹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡓⠙⣿⡟⠀⠀⠀⠈⠛⣷⣶⡄⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⢀⣬⠆⢠⣍⣛⠻⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣉⣀⡀⠀⠀⠈⠛⢿⣦⡀
|
||||
⠐⠒⠒⠶⠶⠶⢦⣬⣟⣥⣀⡉⠛⠻⠶⢁⣤⣾⣿⣿⣿⣷⡄⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⣿⣟⡛⠿⠭⠭⠭⠭⠭⠿⠿⠿⢿⣿⣟⠃⠀⠀⠀⠹⣟⠓
|
||||
⠀⣀⣠⠤⠤⢤⣤⣾⣤⡄⣉⣉⣙⣓⡂⣿⣿⣭⣹⣿⣿⣿⣿⡰⣂⣀⢀⠀⠻⣿⠛⠻⠟⠡⣶⣾⣿⣿⣿⣿⣿⣿⣿⡖⠒⠒⠒⠛⠷⢤⡀⢰⣴⣿⡆
|
||||
⠀⠀⠀⢀⣠⡴⠾⠟⠻⣟⡉⠉⠉⠉⢁⢿⣿⣿⣿⣿⣿⣿⡿⣱⣿⣭⡌⠤⠀⠀⠐⣶⣌⡻⣶⣭⡻⢿⣿⣿⣿⣿⣿⣯⣥⣤⣦⠀⠠⣴⣶⣶⣿⡟⢿
|
||||
⢀⠔⠊⠉⠀⠀⠀⠀⢸⣯⣤⠀⠀⠠⣼⣮⣟⣿⣿⣿⣻⣭⣾⣿⣿⣷⣶⣦⠶⣚⣾⣿⣿⣷⣜⣿⣿⣶⣝⢿⣿⣿⣿⣿⣷⣦⣄⣰⡄⠈⢿⣿⡿⣇⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠈⢡⢇⠀⠀⣠⣿⣿⣿⣯⣟⣛⣛⣛⣛⣛⣩⣭⣴⣶⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣻⣿⣧⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⣾⠏⠀⢹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣍⣿⣿⣿⣿⡄⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣾⡁⢈⣾⣿⡿⠛⣛⣿⣿⣿⣿ DO YOU BRING FEEDS? ⣿⣿⣿⣿⣿⣿⡏⠈⠙⠈⠁⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⡿⠛⠉⣽⣿⣷⣾⡿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠌⠛⠉⠀⠁⠀⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠹⠋⠀⢻⣿⣿⣿⣿⠿⢿⣿⣿⣿⣿⣿⣿⠿⣿⣿⣿⣿⠿⠛⠋⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⠉⠁⠀⠀⠀⠀⠀⠈⠉⠉⠀⠀⠈⠋⠉⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
||||
'';
|
||||
}
|
45
m/tent/configuration.nix
Normal file
45
m/tent/configuration.nix
Normal file
@ -0,0 +1,45 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/xeon.nix
|
||||
../module/emulation.nix
|
||||
../module/debuginfod.nix
|
||||
../module/ssh-hut-extern.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675";
|
||||
|
||||
networking = {
|
||||
hostName = "tent";
|
||||
interfaces.eno1.ipv4.addresses = [
|
||||
{
|
||||
address = "10.0.44.4";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
|
||||
# Only BSC DNSs seem to be reachable from the office VLAN
|
||||
nameservers = [ "84.88.52.35" "84.88.52.36" ];
|
||||
defaultGateway = "10.0.44.1";
|
||||
};
|
||||
|
||||
nix.settings = {
|
||||
substituters = [ "https://jungle.bsc.es/cache" ];
|
||||
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
||||
};
|
||||
|
||||
services.prometheus.exporters.node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
|
||||
programs.ssh.extraConfig = ''
|
||||
Host hut
|
||||
ProxyJump ssfhead
|
||||
HostName xeon07
|
||||
'';
|
||||
}
|
1
nixos-config.nix
Normal file
1
nixos-config.nix
Normal file
@ -0,0 +1 @@
|
||||
(builtins.getFlake (toString ./.)).nixosConfigurations
|
@ -1,8 +0,0 @@
|
||||
self: super:
|
||||
with super.lib;
|
||||
let
|
||||
# Load the system config and get the `nixpkgs.overlays` option
|
||||
overlays = (import <nixpkgs/nixos> { }).config.nixpkgs.overlays;
|
||||
in
|
||||
# Apply all overlays to the input of the current "main" overlay
|
||||
foldl' (flip extends) (_: super) overlays self
|
25
pkgs/meteocat-exporter/default.nix
Normal file
25
pkgs/meteocat-exporter/default.nix
Normal file
@ -0,0 +1,25 @@
|
||||
{ python3Packages, lib }:
|
||||
|
||||
python3Packages.buildPythonApplication rec {
|
||||
pname = "meteocat-exporter";
|
||||
version = "1.0";
|
||||
|
||||
src = ./.;
|
||||
|
||||
doCheck = false;
|
||||
|
||||
build-system = with python3Packages; [
|
||||
setuptools
|
||||
];
|
||||
|
||||
dependencies = with python3Packages; [
|
||||
beautifulsoup4
|
||||
lxml
|
||||
prometheus-client
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "MeteoCat Prometheus Exporter";
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
54
pkgs/meteocat-exporter/meteocat-exporter
Normal file
54
pkgs/meteocat-exporter/meteocat-exporter
Normal file
@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import time
|
||||
from prometheus_client import start_http_server, Gauge
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib import request
|
||||
|
||||
# Configuration -------------------------------------------
|
||||
meteo_station = "X8" # Barcelona - Zona Universitària
|
||||
listening_port = 9929
|
||||
update_period = 60 * 5 # Each 5 min
|
||||
# ---------------------------------------------------------
|
||||
|
||||
metric_tmin = Gauge('meteocat_temp_min', 'Min temperature')
|
||||
metric_tmax = Gauge('meteocat_temp_max', 'Max temperature')
|
||||
metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature')
|
||||
metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation')
|
||||
|
||||
def update(st):
|
||||
url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st
|
||||
response = request.urlopen(url)
|
||||
data = response.read()
|
||||
soup = BeautifulSoup(data, 'lxml')
|
||||
table = soup.find("table", {"class" : "tblperiode"})
|
||||
rows = table.find_all('tr')
|
||||
row = rows[-1] # Take the last row
|
||||
row_data = []
|
||||
header = row.find('th')
|
||||
header_text = header.text.strip()
|
||||
row_data.append(header_text)
|
||||
for col in row.find_all('td'):
|
||||
row_data.append(col.text)
|
||||
try:
|
||||
# Sometimes it will return '(s/d)' and fail to parse
|
||||
metric_tavg.set(float(row_data[1]))
|
||||
metric_tmax.set(float(row_data[2]))
|
||||
metric_tmin.set(float(row_data[3]))
|
||||
metric_srad.set(float(row_data[10]))
|
||||
#print("ok: temp_avg={}".format(float(row_data[1])))
|
||||
except:
|
||||
print("cannot parse row: {}".format(row))
|
||||
metric_tavg.set(float("nan"))
|
||||
metric_tmax.set(float("nan"))
|
||||
metric_tmin.set(float("nan"))
|
||||
metric_srad.set(float("nan"))
|
||||
|
||||
if __name__ == '__main__':
|
||||
start_http_server(port=listening_port, addr="localhost")
|
||||
while True:
|
||||
try:
|
||||
update(meteo_station)
|
||||
except:
|
||||
print("update failed")
|
||||
time.sleep(update_period)
|
11
pkgs/meteocat-exporter/setup.py
Normal file
11
pkgs/meteocat-exporter/setup.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(name='meteocat-exporter',
|
||||
version='1.0',
|
||||
# Modules to import from other scripts:
|
||||
packages=find_packages(),
|
||||
# Executables
|
||||
scripts=["meteocat-exporter"],
|
||||
)
|
36
pkgs/mpich-fix-hwtopo.patch
Normal file
36
pkgs/mpich-fix-hwtopo.patch
Normal file
@ -0,0 +1,36 @@
|
||||
diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
|
||||
index 33e88bc..ee3641c 100644
|
||||
--- a/src/util/mpir_hwtopo.c
|
||||
+++ b/src/util/mpir_hwtopo.c
|
||||
@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
|
||||
#ifdef HAVE_HWLOC
|
||||
bindset = hwloc_bitmap_alloc();
|
||||
hwloc_topology_init(&hwloc_topology);
|
||||
- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
|
||||
- if (xmlfile != NULL) {
|
||||
- int rc;
|
||||
- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
|
||||
- if (rc == 0) {
|
||||
- /* To have hwloc still actually call OS-specific hooks, the
|
||||
- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
|
||||
- * file is really the underlying system. */
|
||||
- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
|
||||
- }
|
||||
- MPL_free(xmlfile);
|
||||
- }
|
||||
|
||||
hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
|
||||
if (!hwloc_topology_load(hwloc_topology))
|
||||
|
||||
--- a/src/mpi/init/local_proc_attrs.c
|
||||
+++ b/src/mpi/init/local_proc_attrs.c
|
||||
@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
|
||||
/* Set the number of tag bits. The device may override this value. */
|
||||
MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
|
||||
|
||||
- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
|
||||
- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
|
||||
- MPL_free(requested_kinds);
|
||||
-
|
||||
return mpi_errno;
|
||||
}
|
59
pkgs/overlay.nix
Normal file
59
pkgs/overlay.nix
Normal file
@ -0,0 +1,59 @@
|
||||
final: prev:
|
||||
{
|
||||
# Set MPICH as default
|
||||
mpi = final.mpich;
|
||||
|
||||
# Configure the network for MPICH
|
||||
mpich = with final; let
|
||||
# pmix comes with the libraries in .out and headers in .dev
|
||||
pmixAll = symlinkJoin {
|
||||
name = "pmix-all";
|
||||
paths = [ pmix.dev pmix.out ];
|
||||
};
|
||||
in prev.mpich.overrideAttrs (old: {
|
||||
patches = (old.patches or []) ++ [
|
||||
# See https://github.com/pmodels/mpich/issues/6946
|
||||
./mpich-fix-hwtopo.patch
|
||||
];
|
||||
buildInput = old.buildInputs ++ [
|
||||
libfabric
|
||||
pmixAll
|
||||
];
|
||||
configureFlags = [
|
||||
"--enable-shared"
|
||||
"--enable-sharedlib"
|
||||
"--with-pm=no"
|
||||
"--with-device=ch4:ofi"
|
||||
"--with-pmi=pmix"
|
||||
"--with-pmix=${pmixAll}"
|
||||
"--with-libfabric=${libfabric}"
|
||||
"--enable-g=log"
|
||||
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
|
||||
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
|
||||
"FCFLAGS=-fallow-argument-mismatch"
|
||||
];
|
||||
});
|
||||
|
||||
slurm = prev.slurm.overrideAttrs (old: {
|
||||
patches = (old.patches or []) ++ [
|
||||
# See https://bugs.schedmd.com/show_bug.cgi?id=19324
|
||||
./slurm-rank-expansion.patch
|
||||
];
|
||||
# Install also the pam_slurm_adopt library to restrict users from accessing
|
||||
# nodes with no job allocated.
|
||||
postBuild = (old.postBuild or "") + ''
|
||||
pushd contribs/pam_slurm_adopt
|
||||
make "PAM_DIR=$out/lib/security"
|
||||
popd
|
||||
'';
|
||||
postInstall = (old.postInstall or "") + ''
|
||||
pushd contribs/pam_slurm_adopt
|
||||
make "PAM_DIR=$out/lib/security" install
|
||||
popd
|
||||
'';
|
||||
});
|
||||
|
||||
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
|
||||
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
|
||||
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
|
||||
}
|
22
pkgs/slurm-exporter.nix
Normal file
22
pkgs/slurm-exporter.nix
Normal file
@ -0,0 +1,22 @@
|
||||
{ buildGoModule, fetchFromGitHub, lib }:
|
||||
|
||||
buildGoModule rec {
|
||||
pname = "prometheus-slurm-exporter";
|
||||
version = "0.20";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
rev = version;
|
||||
owner = "vpenso";
|
||||
repo = pname;
|
||||
sha256 = "sha256-KS9LoDuLQFq3KoKpHd8vg1jw20YCNRJNJrnBnu5vxvs=";
|
||||
};
|
||||
|
||||
vendorHash = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI=";
|
||||
doCheck = false;
|
||||
|
||||
meta = with lib; {
|
||||
description = "Prometheus SLURM Exporter";
|
||||
homepage = "https://github.com/vpenso/prometheus-slurm-exporter";
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
11
pkgs/slurm-rank-expansion.patch
Normal file
11
pkgs/slurm-rank-expansion.patch
Normal file
@ -0,0 +1,11 @@
|
||||
--- a/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:05:24.815313882 +0100
|
||||
+++ b/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:09:53.936900823 +0100
|
||||
@@ -314,7 +314,7 @@ static void _dmdx_req(buf_t *buf, int no
|
||||
}
|
||||
|
||||
nsptr = pmixp_nspaces_local();
|
||||
- if (nsptr->ntasks <= rank) {
|
||||
+ if ((long) nsptr->ntasks <= (long) rank) {
|
||||
char *nodename = pmixp_info_job_host(nodeid);
|
||||
PMIXP_ERROR("Bad request from %s: nspace \"%s\" has only %d ranks, asked for %d",
|
||||
nodename, ns, nsptr->ntasks, rank);
|
24
pkgs/upc-qaire-exporter/default.nix
Normal file
24
pkgs/upc-qaire-exporter/default.nix
Normal file
@ -0,0 +1,24 @@
|
||||
{ python3Packages, lib }:
|
||||
|
||||
python3Packages.buildPythonApplication rec {
|
||||
pname = "upc-qaire-exporter";
|
||||
version = "1.0";
|
||||
|
||||
src = ./.;
|
||||
|
||||
doCheck = false;
|
||||
|
||||
build-system = with python3Packages; [
|
||||
setuptools
|
||||
];
|
||||
|
||||
dependencies = with python3Packages; [
|
||||
prometheus-client
|
||||
requests
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "UPC Qaire Prometheus Exporter";
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
11
pkgs/upc-qaire-exporter/setup.py
Normal file
11
pkgs/upc-qaire-exporter/setup.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(name='upc-qaire-exporter',
|
||||
version='1.0',
|
||||
# Modules to import from other scripts:
|
||||
packages=find_packages(),
|
||||
# Executables
|
||||
scripts=["upc-qaire-exporter"],
|
||||
)
|
74
pkgs/upc-qaire-exporter/upc-qaire-exporter
Normal file
74
pkgs/upc-qaire-exporter/upc-qaire-exporter
Normal file
@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import time
|
||||
from prometheus_client import start_http_server, Gauge
|
||||
import requests, json
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Configuration -------------------------------------------
|
||||
listening_port = 9928
|
||||
update_period = 60 * 5 # Each 5 min
|
||||
# ---------------------------------------------------------
|
||||
|
||||
metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor')
|
||||
|
||||
def genparams():
|
||||
d = {}
|
||||
d['topic'] = 'TEMPERATURE'
|
||||
d['shift_dates_to'] = ''
|
||||
d['datapoints'] = 301
|
||||
d['devicesAndColors'] = '1148418@@@#40ACB6'
|
||||
|
||||
now = datetime.now()
|
||||
|
||||
d['fromDate'] = now.strftime('%d/%m/%Y')
|
||||
d['toDate'] = now.strftime('%d/%m/%Y')
|
||||
d['serviceFrequency'] = 'NONE'
|
||||
|
||||
# WTF!
|
||||
for i in range(7):
|
||||
for j in range(48):
|
||||
key = 'week.days[{}].hours[{}].value'.format(i, j)
|
||||
d[key] = 'OPEN'
|
||||
|
||||
return d
|
||||
|
||||
def measure():
|
||||
# First we need to load session
|
||||
s = requests.Session()
|
||||
r = s.get("https://upc.edu/sirena")
|
||||
if r.status_code != 200:
|
||||
print("bad HTTP status code on new session: {}".format(r.status_code))
|
||||
return
|
||||
|
||||
if s.cookies.get("JSESSIONID") is None:
|
||||
print("cannot get JSESSIONID")
|
||||
return
|
||||
|
||||
# Now we can pull the data
|
||||
url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json"
|
||||
r = s.post(url, data=genparams())
|
||||
|
||||
if r.status_code != 200:
|
||||
print("bad HTTP status code on data: {}".format(r.status_code))
|
||||
return
|
||||
|
||||
#print(r.text)
|
||||
j = json.loads(r.content)
|
||||
|
||||
# Just take the last one
|
||||
last = j['data']['chartElementList'][-1]
|
||||
temp = last['values']['1148418-Temperatura']
|
||||
|
||||
return temp
|
||||
|
||||
if __name__ == '__main__':
|
||||
start_http_server(port=listening_port, addr="localhost")
|
||||
while True:
|
||||
try:
|
||||
metric_temp.set(measure())
|
||||
except:
|
||||
print("measure failed")
|
||||
metric_temp.set(float("nan"))
|
||||
|
||||
time.sleep(update_period)
|
16
rebuild.sh
Executable file
16
rebuild.sh
Executable file
@ -0,0 +1,16 @@
|
||||
#!/bin/sh -ex
|
||||
|
||||
if [ "$(id -u)" != 0 ]; then
|
||||
echo "Needs root permissions"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$(hostname)" != "hut" ]; then
|
||||
>&2 echo "must run from machine hut, not $(hostname)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Update all nodes
|
||||
nixos-rebuild switch --flake .
|
||||
nixos-rebuild switch --flake .#owl1 --target-host owl1
|
||||
nixos-rebuild switch --flake .#owl2 --target-host owl2
|
BIN
secrets/ceph-user.age
Normal file
BIN
secrets/ceph-user.age
Normal file
Binary file not shown.
BIN
secrets/gitea-runner-token.age
Normal file
BIN
secrets/gitea-runner-token.age
Normal file
Binary file not shown.
10
secrets/gitlab-bsc-docker-token.age
Normal file
10
secrets/gitlab-bsc-docker-token.age
Normal file
@ -0,0 +1,10 @@
|
||||
age-encryption.org/v1
|
||||
-> ssh-ed25519 HY2yRg XPOFoZqY+AnKC77jrgNqAm1ADphurfuhO4NRrfiuUDc
|
||||
iCfMMpGHyaYHGy6ci8sqjUtcPeteLlyvLGEF79VPOEc
|
||||
-> ssh-ed25519 CAWG4Q 6OsGrnM+/c5lTN81Rvp166K+ygmSIFeSYzXxYg25KGE
|
||||
Av1zTw2zK4Gufzti9kQaye7C362GCiDRRHzCqBLR33g
|
||||
-> ssh-ed25519 MSF3dg 8CHqJ7mEDvjvqbmF+eE6Em1Wi6eHAzEUpiExC1gm7S0
|
||||
bdwzYHw3RAbdHq+RsiFUP++sQ586VUlSnAzAOhiQUjI
|
||||
--- gA5XSUfjUBol938sC5DbUf8PvQUIr2pNkS2nL95OF9c
|
||||
ý‘îEa1G7·ŠÝ©[R¥€\{~$GoðcQœwKP&²»Üw«›Ç6]
|
||||
¤ÙÑ£ó”€À÷ç^zôÌ„ 1k·í‘Üìì<C3AC>Y»<59>2Íp§2¼ÜKîÒäŒnokî°ž¹/X¾Âpt''±Ú$0co=“Ø
|
9
secrets/gitlab-runner-docker-token.age
Normal file
9
secrets/gitlab-runner-docker-token.age
Normal file
@ -0,0 +1,9 @@
|
||||
age-encryption.org/v1
|
||||
-> ssh-ed25519 HY2yRg pXNTB/ailRwSEJG1pXvrzzpz5HqkDZdWVWnOH7JGeQ4
|
||||
NzA+2fxfkNRy/u+Zq96A02K1Vxy0ETYZjMkDVTKyCY8
|
||||
-> ssh-ed25519 CAWG4Q 7CLJWn+EAxoWDduXaOSrHaBFHQ4GIpYP/62FFTj3ZTI
|
||||
vSYV1pQg2qI2ngCzM0nCZAnqdz1tbT4hM5m+/TyGU2c
|
||||
-> ssh-ed25519 MSF3dg Akmp4NcZcDuaYHta/Vej6zulNSrAOCd5lmSV+OiBGC4
|
||||
qTxqVzTyywur+GjtUQdbaIUdH1fqCqPe6qPf8iHRa4w
|
||||
--- uCKNqD1TmZZThOzlpsecBKx/k+noIWhCVMr/pzNwBr8
|
||||
r'ÖÆ‹s4í˺ÐAÄ¥„PíLù‘7` â—š)HŽ“-ú0ÓAHŽ5ÇÁ€ñL®QeÍÌH2bÒƒBÞ²óCJG¯"-SÝÊ\åÎþH<nðV
P³á~øtÃ=vçqÂ\šNA0£Ñ:
|
9
secrets/gitlab-runner-shell-token.age
Normal file
9
secrets/gitlab-runner-shell-token.age
Normal file
@ -0,0 +1,9 @@
|
||||
age-encryption.org/v1
|
||||
-> ssh-ed25519 HY2yRg s6iI9f25xulF4KXt+XY07kXXPKxXo7f2Ql/OTHN55Hk
|
||||
WO4Fd2H9c+HL3+XhUF3BmEZVILlcchGxSrSmL2OEdGw
|
||||
-> ssh-ed25519 CAWG4Q TBkdpx8k8K1NvW3wcvaF7omKFwEJ2DxWJp3tIOTjwCA
|
||||
LcYgWRix23AQnw0OQ7f8+8S3J84CHUElX1vKZSETiLE
|
||||
-> ssh-ed25519 MSF3dg WzrF8kjTP7BXXDjmUp7kPCKguthAW12RPo6Vy2RMmh4
|
||||
8C3mT9ktudCTANDxhyNszUkbeDG6X4wOJdx825++dYM
|
||||
--- /w3YQ2UeTi67H1JR0GsdPz2KoLN2Y7BIZfFY+//AWjY
|
||||
ŽÓ£-`PÝ@þ€Þ„‹Œ³)9®9l™ð‹ØZfƒÍV?I>Î<>Ÿwé‰<C3A9>¡z40 ³2{i@…ZÁîx¦±AHná%Ü×Ïʤÿ/W¶®Ä”¢løç–å}Æ&–ì–¶Ä(–ÂÐKªóÙS±Åo·z¨=Ÿd
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user