Compare commits
257 Commits
9eeebd05bf
...
share-file
| Author | SHA1 | Date | |
|---|---|---|---|
| 9b1391a9f6 | |||
| c8ca5adf84 | |||
| 43e4c60dd5 | |||
| f5d6f32ca8 | |||
| 8fccb40a7a | |||
| 4bd1648074 | |||
| 15b114ffd6 | |||
| dd6d8c9735 | |||
| e15a3867d4 | |||
| 5cad208de6 | |||
| c8687f7e45 | |||
| d988ef2eff | |||
| b07929eab3 | |||
| b3e397eb4c | |||
| 5ad2c683ed | |||
| 1f06f0fa0c | |||
| 8ca1d84844 | |||
| 998f599be3 | |||
| fcfc6ac149 | |||
| 6e87130166 | |||
| 06f9e6ac6b | |||
| da07aedce2 | |||
| 61427a8bf9 | |||
| 958ad1f025 | |||
| 1c5f3a856f | |||
| 4e2b80defd | |||
| 1c8efd0877 | |||
| 4c5e85031b | |||
| 5688823fcc | |||
| 72faf8365b | |||
| 0e22d6def8 | |||
| 22cc1d33f7 | |||
| 15085c8a05 | |||
| 06748dac1d | |||
| 63851306ac | |||
| 2bdc793c8c | |||
| 85d1c5e34c | |||
| e6b7af5272 | |||
| c0ae8770bc | |||
| 5b51e8947f | |||
| db2c6f7e45 | |||
| 8e8f9e7adb | |||
| d2adc3a6d3 | |||
| 76cd9ea47f | |||
| 2f851bc216 | |||
| 834d3187e5 | |||
| 49be0f208c | |||
| fb23b41dae | |||
| 005a67deaf | |||
| f8097cb5cb | |||
| ff792f5f48 | |||
| 5c48b43ae0 | |||
| b299ead00b | |||
| a92432cf5a | |||
| 82f5d828c2 | |||
| 35a94a9b02 | |||
| b6bd31e159 | |||
| 1d4badda5b | |||
| bd5214a3b9 | |||
| c32f6dea97 | |||
| dd341902fc | |||
| 190e273112 | |||
| 268807d1d0 | |||
| 2953080fb8 | |||
| 9871517be2 | |||
| 736eacaac5 | |||
| 0e66aad099 | |||
| 67a4905a0a | |||
| d52d22e0db | |||
| 42920c2521 | |||
| 4acd35e036 | |||
| 621d20db3a | |||
| 0926f6ec1f | |||
| 61646cb3bd | |||
| c0066c4744 | |||
| ffd0593f51 | |||
| f49ae0773e | |||
| 8fa3fccecb | |||
| 9ee7111453 | |||
| 8de3d2b149 | |||
| bc62e28ca3 | |||
| d612a5453c | |||
| 653d411b9e | |||
| 51c57dbc41 | |||
| 33cd40160e | |||
| a1e8cfea47 | |||
| 5d72ee3da3 | |||
| fdc6445d47 | |||
| e88805947e | |||
| aaefddc44a | |||
| d9d249411d | |||
| c07f75c6bb | |||
| 8d449ba20c | |||
| 10ca572aec | |||
| 75b0f48715 | |||
| 19a451db77 | |||
| ec9be9bb62 | |||
| 7ddd1977f3 | |||
| 7050c505b5 | |||
| 033a1fe97b | |||
| 77cb3c494e | |||
| 6db5772ac4 | |||
| 3e347e673c | |||
| dca274d020 | |||
| c33909f32f | |||
| 64e856e8b9 | |||
| 02f40a8217 | |||
| 77d43b6da9 | |||
| ab55aac5ff | |||
| 9b5bfbb7a3 | |||
| a69a71d1b0 | |||
| 98374bd303 | |||
| 3b6be8a2fc | |||
| 2bb366b9ac | |||
| 2d16709648 | |||
| 9344daa31c | |||
| 80c98041b5 | |||
| 3418e57907 | |||
| 6848b58e39 | |||
| 13a70411aa | |||
| f9c77b433a | |||
| 9d487845f6 | |||
| 3c99c2a662 | |||
| 7d09108c9f | |||
| 0f0a861896 | |||
| beb0d5940e | |||
| 70321ce237 | |||
| 5bd1d67333 | |||
| fad9df61e1 | |||
| d2a80c8c18 | |||
| 599613d139 | |||
| ac4fa9abd4 | |||
| cb3a7b19f7 | |||
| f5d6bf627b | |||
| f1ce815edd | |||
| a2075cfd65 | |||
| 8f1f6f92a8 | |||
| 3416416864 | |||
| 815888fb07 | |||
| 029d9cb1db | |||
| 95fa67ede1 | |||
| a19347161f | |||
| 58c1cc1f7c | |||
| b06399dc70 | |||
| 077eece6b9 | |||
| b3ef53de51 | |||
| e0852ee89b | |||
| dfffc0bdce | |||
| 8257c245b1 | |||
| cd5853cf53 | |||
| b677b827d4 | |||
| b1d5185cca | |||
| a7e66e2246 | |||
| 480c97e952 | |||
| f8fb5fa4ff | |||
| acf9b71f04 | |||
| bf692e6e4e | |||
| c242b65e47 | |||
| 55d6c17776 | |||
| 14b173f67e | |||
| b9001cdf7d | |||
| f892d43b47 | |||
| d9e9ee6e3a | |||
| 79adbe76a8 | |||
| 66fb848ba8 | |||
| 40b1a8f0df | |||
| a0b9d10b14 | |||
| 4c309dea2f | |||
| b3a397eee4 | |||
| 7c1fe1455b | |||
| 2d4b178895 | |||
| 4dd25f2f89 | |||
| 6dcd9d8144 | |||
| 31be81d2b1 | |||
| 826cfdf43f | |||
| a1f258c5ce | |||
| 1c1d3f3231 | |||
| 623d46c03f | |||
| 518a4d6af3 | |||
| 60077948d6 | |||
| c76bfa7f86 | |||
| 6c10933e80 | |||
| 6402605b1f | |||
| 1724535495 | |||
| 5b41670f36 | |||
| ab04855382 | |||
| 684d5e41c5 | |||
| 316ea18e24 | |||
| c916157fcc | |||
| 4e9409db10 | |||
| 94320d9256 | |||
| 9f5941c2be | |||
| fba0f7b739 | |||
| 2e95281af5 | |||
| f4ac9f3186 | |||
| f787343f29 | |||
| 70304d26ff | |||
| 76c10ec22e | |||
| 011e8c2bf8 | |||
| c1f138a9c1 | |||
| 1552eeca12 | |||
| 8769f3d418 | |||
| a4c254fcd6 | |||
| 24fb1846d2 | |||
| 5e77d0b86c | |||
| 494fda126c | |||
| 5cfa2f9611 | |||
| 9539a24bdb | |||
| 98c4d924dd | |||
| 7aae967c65 | |||
| 49f7edddac | |||
| 2f055d9fc5 | |||
| 108abffd2a | |||
| 4c19ad66e3 | |||
| 19c01aeb1d | |||
| fc90b40310 | |||
| 81de0effb1 | |||
| 5ce93ff85a | |||
| c020b9f5d6 | |||
| f47734b524 | |||
| ca3a7d98f5 | |||
| 0d5609ecc2 | |||
| 818edccb34 | |||
| 2815f5bcfd | |||
| c1bbbd7793 | |||
| aa1dd14b62 | |||
| 399103a9b4 | |||
| 74639d3ece | |||
| 613a76ac29 | |||
| c3ea8864bb | |||
| 919f211536 | |||
| 141d77e2b6 | |||
| 44fcb97ec7 | |||
| 543983e9f3 | |||
| 95bbeeb646 | |||
| de2af79810 | |||
| b9aff1dba5 | |||
| 7da979bed2 | |||
| cfe37640ea | |||
| 096e407571 | |||
| ae31b546e7 | |||
| c3a2766bb7 | |||
| b568bb36d4 | |||
| 55f784e6b7 | |||
| dfab84b0ba | |||
| 8f66ba824a | |||
| 79bd4398f3 | |||
| b44afdaaa1 | |||
| 9528fab3ef | |||
| 7e82885d84 | |||
| 57ed0cf319 | |||
| b043ee3b1d | |||
| 9e3bdaabb6 | |||
| 77f72ac939 | |||
| fa25a68571 | |||
|
|
ea0f406849 | ||
|
|
9df6be1b6b |
@@ -1,20 +0,0 @@
|
||||
name: CI
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
build:all:
|
||||
runs-on: native
|
||||
steps:
|
||||
- uses: https://gitea.com/ScMi1/checkout@v1.4
|
||||
- run: nix build -L --no-link --print-out-paths .#bsc.ci.all
|
||||
build:cross:
|
||||
runs-on: native
|
||||
steps:
|
||||
- uses: https://gitea.com/ScMi1/checkout@v1.4
|
||||
- run: nix build -L --no-link --print-out-paths .#bsc.ci.cross
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,3 +1,2 @@
|
||||
**.swp
|
||||
*.swp
|
||||
/result
|
||||
/misc
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
build:bsc-ci.all:
|
||||
stage: build
|
||||
tags:
|
||||
- nix
|
||||
script:
|
||||
- nix build -L --no-link --print-out-paths .#bsc-ci.all
|
||||
21
COPYING
21
COPYING
@@ -1,21 +0,0 @@
|
||||
Copyright (c) 2020-2025 Barcelona Supercomputing Center
|
||||
Copyright (c) 2003-2020 Eelco Dolstra and the Nixpkgs/NixOS contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
@@ -1,9 +0,0 @@
|
||||
# Jungle
|
||||
|
||||
This repository provides two components that can be used independently:
|
||||
|
||||
- A Nix overlay with packages used at BSC (formerly known as bscpkgs). Access
|
||||
them directly with `nix shell .#<pkgname>`.
|
||||
|
||||
- NixOS configurations for jungle machines. Use `nixos-rebuild switch --flake .`
|
||||
to upgrade the current machine.
|
||||
19
default.nix
19
default.nix
@@ -1,19 +0,0 @@
|
||||
let
|
||||
bscOverlay = import ./overlay.nix;
|
||||
|
||||
# read flake.lock and determine revision from there
|
||||
lock = builtins.fromJSON (builtins.readFile ./flake.lock);
|
||||
inherit (lock.nodes.nixpkgs.locked) rev narHash;
|
||||
fetchedNixpkgs = builtins.fetchTarball {
|
||||
url = "https://github.com/NixOS/nixpkgs/archive/${rev}.tar.gz";
|
||||
sha256 = narHash;
|
||||
};
|
||||
in
|
||||
{ overlays ? [ ]
|
||||
, nixpkgs ? fetchedNixpkgs
|
||||
, ...
|
||||
}@attrs:
|
||||
import nixpkgs (
|
||||
(builtins.removeAttrs attrs [ "overlays" "nixpkgs" ]) //
|
||||
{ overlays = [ bscOverlay ] ++ overlays; }
|
||||
)
|
||||
BIN
doc/Intel_Server_Board_S2600WF_TPS_2_6.pdf
Normal file
BIN
doc/Intel_Server_Board_S2600WF_TPS_2_6.pdf
Normal file
Binary file not shown.
BIN
doc/R1000WF_SystemIntegration_and_ServiceGuide_Rev2_4.pdf
Normal file
BIN
doc/R1000WF_SystemIntegration_and_ServiceGuide_Rev2_4.pdf
Normal file
Binary file not shown.
BIN
doc/SEL_TroubleshootingGuide.pdf
Normal file
BIN
doc/SEL_TroubleshootingGuide.pdf
Normal file
Binary file not shown.
BIN
doc/bsc-ssf.pdf
Normal file
BIN
doc/bsc-ssf.pdf
Normal file
Binary file not shown.
46
doc/trim.sh
46
doc/trim.sh
@@ -1,46 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Trims the jungle repository by moving the website to its own repository and
|
||||
# removing it from jungle. It also removes big pdf files and kernel
|
||||
# configurations so the jungle repository is small.
|
||||
|
||||
set -e
|
||||
|
||||
if [ -e oldjungle -o -e newjungle -o -e website ]; then
|
||||
echo "remove oldjungle/, newjungle/ and website/ first"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Clone the old jungle repo
|
||||
git clone gitea@tent:rarias/jungle.git oldjungle
|
||||
|
||||
# First split the website into a new repository
|
||||
mkdir website && git -C website init -b master
|
||||
git-filter-repo \
|
||||
--path web \
|
||||
--subdirectory-filter web \
|
||||
--source oldjungle \
|
||||
--target website
|
||||
|
||||
# Then remove the website, pdf files and big kernel configs
|
||||
mkdir newjungle && git -C newjungle init -b master
|
||||
git-filter-repo \
|
||||
--invert-paths \
|
||||
--path web \
|
||||
--path-glob 'doc*.pdf' \
|
||||
--path-glob '**/kernel/configs/lockdep' \
|
||||
--path-glob '**/kernel/configs/defconfig' \
|
||||
--source oldjungle \
|
||||
--target newjungle
|
||||
|
||||
set -x
|
||||
|
||||
du -sh oldjungle newjungle website
|
||||
# 57M oldjungle
|
||||
# 2,3M newjungle
|
||||
# 6,4M website
|
||||
|
||||
du -sh --exclude=.git oldjungle newjungle website
|
||||
# 30M oldjungle
|
||||
# 700K newjungle
|
||||
# 3,5M website
|
||||
47
flake.lock
generated
47
flake.lock
generated
@@ -10,11 +10,11 @@
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1750173260,
|
||||
"narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=",
|
||||
"lastModified": 1720546205,
|
||||
"narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=",
|
||||
"owner": "ryantm",
|
||||
"repo": "agenix",
|
||||
"rev": "531beac616433bac6f9e2a19feb8e99a22a66baf",
|
||||
"rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -23,6 +23,26 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"bscpkgs": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1713974364,
|
||||
"narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "de89197a4a7b162db7df9d41c9d07759d87c5709",
|
||||
"revCount": 937,
|
||||
"type": "git",
|
||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||
},
|
||||
"original": {
|
||||
"type": "git",
|
||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||
}
|
||||
},
|
||||
"darwin": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
@@ -31,11 +51,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1744478979,
|
||||
"narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=",
|
||||
"lastModified": 1700795494,
|
||||
"narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
|
||||
"owner": "lnl7",
|
||||
"repo": "nix-darwin",
|
||||
"rev": "43975d782b418ebf4969e9ccba82466728c2851b",
|
||||
"rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -53,11 +73,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1745494811,
|
||||
"narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=",
|
||||
"lastModified": 1703113217,
|
||||
"narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
|
||||
"owner": "nix-community",
|
||||
"repo": "home-manager",
|
||||
"rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be",
|
||||
"rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -68,16 +88,16 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1752436162,
|
||||
"narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
|
||||
"lastModified": 1720957393,
|
||||
"narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
|
||||
"rev": "693bc46d169f5af9c992095736e82c3488bf7dbb",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-25.05",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
@@ -85,6 +105,7 @@
|
||||
"root": {
|
||||
"inputs": {
|
||||
"agenix": "agenix",
|
||||
"bscpkgs": "bscpkgs",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
},
|
||||
|
||||
33
flake.nix
33
flake.nix
@@ -1,29 +1,23 @@
|
||||
{
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
agenix.url = "github:ryantm/agenix";
|
||||
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
||||
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
|
||||
bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, agenix, ... }:
|
||||
outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
|
||||
let
|
||||
mkConf = name: nixpkgs.lib.nixosSystem {
|
||||
system = "x86_64-linux";
|
||||
specialArgs = { inherit nixpkgs agenix; theFlake = self; };
|
||||
specialArgs = { inherit nixpkgs bscpkgs agenix; theFlake = self; };
|
||||
modules = [ "${self.outPath}/m/${name}/configuration.nix" ];
|
||||
};
|
||||
# For now we only support x86
|
||||
system = "x86_64-linux";
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
overlays = [ self.overlays.default ];
|
||||
config.allowUnfree = true;
|
||||
};
|
||||
in
|
||||
{
|
||||
nixosConfigurations = {
|
||||
hut = mkConf "hut";
|
||||
tent = mkConf "tent";
|
||||
owl1 = mkConf "owl1";
|
||||
owl2 = mkConf "owl2";
|
||||
eudy = mkConf "eudy";
|
||||
@@ -31,20 +25,11 @@ in
|
||||
bay = mkConf "bay";
|
||||
lake2 = mkConf "lake2";
|
||||
raccoon = mkConf "raccoon";
|
||||
fox = mkConf "fox";
|
||||
apex = mkConf "apex";
|
||||
weasel = mkConf "weasel";
|
||||
};
|
||||
|
||||
bscOverlay = import ./overlay.nix;
|
||||
overlays.default = self.bscOverlay;
|
||||
|
||||
# full nixpkgs with our overlay applied
|
||||
legacyPackages.${system} = pkgs;
|
||||
|
||||
hydraJobs = self.legacyPackages.${system}.bsc.hydraJobs;
|
||||
|
||||
# propagate nixpkgs lib, so we can do bscpkgs.lib
|
||||
inherit (nixpkgs) lib;
|
||||
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {
|
||||
bscpkgs = bscpkgs.packages.x86_64-linux;
|
||||
nixpkgs = nixpkgs.legacyPackages.x86_64-linux;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
32
keys.nix
32
keys.nix
@@ -2,36 +2,28 @@
|
||||
# here all the public keys
|
||||
rec {
|
||||
hosts = {
|
||||
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
||||
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
||||
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
||||
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
||||
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
||||
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
||||
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
||||
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
|
||||
tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
|
||||
apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
|
||||
weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
|
||||
raccoon = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNQttFvL0dNEyy7klIhLoK4xXOeM2/K9R7lPMTG3qvK raccoon";
|
||||
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
||||
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
||||
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
||||
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
||||
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
||||
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
||||
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
||||
};
|
||||
|
||||
hostGroup = with hosts; rec {
|
||||
compute = [ owl1 owl2 fox raccoon ];
|
||||
playground = [ eudy koro weasel ];
|
||||
compute = [ owl1 owl2 ];
|
||||
playground = [ eudy koro ];
|
||||
storage = [ bay lake2 ];
|
||||
monitor = [ hut ];
|
||||
login = [ apex ];
|
||||
|
||||
system = storage ++ monitor ++ login;
|
||||
system = storage ++ monitor;
|
||||
safe = system ++ compute;
|
||||
all = safe ++ playground;
|
||||
};
|
||||
|
||||
admins = {
|
||||
"rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
||||
"rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
|
||||
"rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
|
||||
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
|
||||
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
||||
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
{ lib, config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/xeon.nix
|
||||
../common/ssf/hosts.nix
|
||||
../module/ceph.nix
|
||||
../module/hut-substituter.nix
|
||||
../module/slurm-server.nix
|
||||
./nfs.nix
|
||||
./wireguard.nix
|
||||
];
|
||||
|
||||
# Don't install grub MBR for now
|
||||
boot.loader.grub.device = "nodev";
|
||||
|
||||
boot.initrd.kernelModules = [
|
||||
"megaraid_sas" # For HW RAID
|
||||
];
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
storcli # To manage HW RAID
|
||||
];
|
||||
|
||||
fileSystems."/home" = {
|
||||
device = "/dev/disk/by-label/home";
|
||||
fsType = "ext4";
|
||||
};
|
||||
|
||||
# No swap, there is plenty of RAM
|
||||
swapDevices = lib.mkForce [];
|
||||
|
||||
networking = {
|
||||
hostName = "apex";
|
||||
defaultGateway = "84.88.53.233";
|
||||
nameservers = [ "8.8.8.8" ];
|
||||
|
||||
# Public facing interface
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "84.88.53.236";
|
||||
prefixLength = 29;
|
||||
} ];
|
||||
|
||||
# Internal LAN to our Ethernet switch
|
||||
interfaces.eno2.ipv4.addresses = [ {
|
||||
address = "10.0.40.30";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
|
||||
# Infiniband over Omnipath switch (disconnected for now)
|
||||
# interfaces.ibp5s0 = {};
|
||||
|
||||
nat = {
|
||||
enable = true;
|
||||
internalInterfaces = [ "eno2" ];
|
||||
externalInterface = "eno1";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall = {
|
||||
extraCommands = ''
|
||||
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
|
||||
# logs. Insert as first position so we also protect SSH.
|
||||
iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse
|
||||
# Same with opsmonweb01.bsc.es which seems to be trying to access via SSH
|
||||
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
|
||||
'';
|
||||
};
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
services.nfs.server = {
|
||||
enable = true;
|
||||
lockdPort = 4001;
|
||||
mountdPort = 4002;
|
||||
statdPort = 4000;
|
||||
exports = ''
|
||||
/home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash)
|
||||
/home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash)
|
||||
'';
|
||||
};
|
||||
networking.firewall = {
|
||||
# Check with `rpcinfo -p`
|
||||
extraCommands = ''
|
||||
# Accept NFS traffic from compute nodes but not from the outside
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
|
||||
# Same but UDP
|
||||
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
|
||||
|
||||
# Accept NFS traffic from wg0
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
|
||||
# Same but UDP
|
||||
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
|
||||
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
{ config, ... }:
|
||||
|
||||
{
|
||||
networking.firewall = {
|
||||
allowedUDPPorts = [ 666 ];
|
||||
};
|
||||
|
||||
age.secrets.wgApex.file = ../../secrets/wg-apex.age;
|
||||
|
||||
# Enable WireGuard
|
||||
networking.wireguard.enable = true;
|
||||
networking.wireguard.interfaces = {
|
||||
# "wg0" is the network interface name. You can name the interface arbitrarily.
|
||||
wg0 = {
|
||||
ips = [ "10.106.0.30/24" ];
|
||||
listenPort = 666;
|
||||
privateKeyFile = config.age.secrets.wgApex.path;
|
||||
# Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
|
||||
peers = [
|
||||
{
|
||||
name = "fox";
|
||||
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
|
||||
allowedIPs = [ "10.106.0.1/32" ];
|
||||
endpoint = "fox.ac.upc.edu:666";
|
||||
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
|
||||
persistentKeepalive = 25;
|
||||
}
|
||||
{
|
||||
name = "raccoon";
|
||||
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
|
||||
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
networking.hosts = {
|
||||
"10.106.0.1" = [ "fox" ];
|
||||
"10.106.0.236" = [ "raccoon" ];
|
||||
"10.0.44.4" = [ "tent" ];
|
||||
};
|
||||
}
|
||||
@@ -2,8 +2,7 @@
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../module/hut-substituter.nix
|
||||
../common/xeon.nix
|
||||
../module/monitoring.nix
|
||||
];
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
# Includes the basic configuration for an Intel server.
|
||||
imports = [
|
||||
./base/agenix.nix
|
||||
./base/always-power-on.nix
|
||||
./base/august-shutdown.nix
|
||||
./base/boot.nix
|
||||
./base/env.nix
|
||||
@@ -11,7 +10,6 @@
|
||||
./base/hw.nix
|
||||
./base/net.nix
|
||||
./base/nix.nix
|
||||
./base/sys-devices.nix
|
||||
./base/ntp.nix
|
||||
./base/rev.nix
|
||||
./base/ssh.nix
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
imports = [
|
||||
../../module/power-policy.nix
|
||||
];
|
||||
|
||||
# Turn on as soon as we have power
|
||||
power.policy = "always-on";
|
||||
}
|
||||
@@ -1,12 +1,12 @@
|
||||
{
|
||||
# Shutdown all machines on August 3rd at 22:00, so we can protect the
|
||||
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
|
||||
# hardware from spurious electrical peaks on the yearly electrical cut for
|
||||
# manteinance that starts on August 4th.
|
||||
systemd.timers.august-shutdown = {
|
||||
description = "Shutdown on August 3rd for maintenance";
|
||||
description = "Shutdown on August 2nd for maintenance";
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "*-08-03 22:00:00";
|
||||
OnCalendar = "*-08-02 11:00:00";
|
||||
RandomizedDelaySec = "10min";
|
||||
Unit = "systemd-poweroff.service";
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
{
|
||||
environment.systemPackages = with pkgs; [
|
||||
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
||||
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
|
||||
ncdu config.boot.kernelPackages.perf ldns pv
|
||||
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
|
||||
ncdu config.boot.kernelPackages.perf ldns
|
||||
# From bsckgs overlay
|
||||
osumb
|
||||
];
|
||||
@@ -21,8 +21,6 @@
|
||||
}
|
||||
];
|
||||
|
||||
environment.enableAllTerminfo = true;
|
||||
|
||||
environment.variables = {
|
||||
EDITOR = "vim";
|
||||
VISUAL = "vim";
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{ pkgs, lib, ... }:
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
networking = {
|
||||
@@ -10,14 +10,10 @@
|
||||
allowedTCPPorts = [ 22 ];
|
||||
};
|
||||
|
||||
# Make sure we use iptables
|
||||
nftables.enable = lib.mkForce false;
|
||||
|
||||
hosts = {
|
||||
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
|
||||
"84.88.51.152" = [ "raccoon" ];
|
||||
"84.88.51.142" = [ "raccoon-ipmi" ];
|
||||
"192.168.11.12" = [ "bscpm04.bsc.es" ];
|
||||
"192.168.11.15" = [ "gitlab-internal.bsc.es" ];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
{ pkgs, nixpkgs, theFlake, ... }:
|
||||
{ pkgs, nixpkgs, bscpkgs, theFlake, ... }:
|
||||
|
||||
{
|
||||
nixpkgs.overlays = [
|
||||
(import ../../../overlay.nix)
|
||||
bscpkgs.bscOverlay
|
||||
(import ../../../pkgs/overlay.nix)
|
||||
];
|
||||
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
|
||||
nix = {
|
||||
nixPath = [
|
||||
"nixpkgs=${nixpkgs}"
|
||||
@@ -24,7 +23,6 @@
|
||||
trusted-users = [ "@wheel" ];
|
||||
flake-registry = pkgs.writeText "global-registry.json"
|
||||
''{"flakes":[],"version":2}'';
|
||||
keep-outputs = true;
|
||||
};
|
||||
|
||||
gc = {
|
||||
@@ -34,21 +32,6 @@
|
||||
};
|
||||
};
|
||||
|
||||
# The nix-gc.service can begin its execution *before* /home is mounted,
|
||||
# causing it to remove all gcroots considering them as stale, as it cannot
|
||||
# access the symlink. To prevent this problem, we force the service to wait
|
||||
# until /home is mounted as well as other remote FS like /ceph.
|
||||
systemd.services.nix-gc = {
|
||||
# Start remote-fs.target if not already being started and fail if it fails
|
||||
# to start. It will also be stopped if the remote-fs.target fails after
|
||||
# starting successfully.
|
||||
bindsTo = [ "remote-fs.target" ];
|
||||
# Wait until remote-fs.target fully starts before starting this one.
|
||||
after = [ "remote-fs.target"];
|
||||
# Ensure we can access a remote path inside /home
|
||||
unitConfig.ConditionPathExists = "/home/Computational";
|
||||
};
|
||||
|
||||
# This value determines the NixOS release from which the default
|
||||
# settings for stateful data, like file locations and database versions
|
||||
# on your system were taken. It‘s perfectly fine and recommended to leave
|
||||
|
||||
@@ -8,11 +8,15 @@ in
|
||||
# Enable the OpenSSH daemon.
|
||||
services.openssh.enable = true;
|
||||
|
||||
# Connect to intranet git hosts via proxy
|
||||
programs.ssh.extraConfig = ''
|
||||
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
|
||||
User git
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
'';
|
||||
|
||||
programs.ssh.knownHosts = hostsKeys // {
|
||||
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
||||
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
||||
"bscpm04.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT";
|
||||
"glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
|
||||
"glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
nix.settings.system-features = [ "sys-devices" ];
|
||||
|
||||
programs.nix-required-mounts.enable = true;
|
||||
programs.nix-required-mounts.allowedPatterns.sys-devices.paths = [
|
||||
"/sys/devices/system/cpu"
|
||||
"/sys/devices/system/node"
|
||||
];
|
||||
}
|
||||
@@ -20,7 +20,6 @@
|
||||
rarias = {
|
||||
uid = 1880;
|
||||
isNormalUser = true;
|
||||
linger = true;
|
||||
home = "/home/Computational/rarias";
|
||||
description = "Rodrigo Arias";
|
||||
group = "Computational";
|
||||
@@ -40,7 +39,7 @@
|
||||
home = "/home/Computational/arocanon";
|
||||
description = "Aleix Roca";
|
||||
group = "Computational";
|
||||
extraGroups = [ "wheel" "tracing" ];
|
||||
extraGroups = [ "wheel" ];
|
||||
hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
|
||||
@@ -56,7 +55,7 @@
|
||||
home = "/home/Computational/rpenacob";
|
||||
description = "Raúl Peñacoba";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ];
|
||||
hosts = [ "owl1" "owl2" "hut" ];
|
||||
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
||||
@@ -69,10 +68,10 @@
|
||||
home = "/home/Computational/anavarro";
|
||||
description = "Antoni Navarro";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ];
|
||||
hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1";
|
||||
hosts = [ "hut" "raccoon" ];
|
||||
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni"
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
|
||||
];
|
||||
};
|
||||
|
||||
@@ -82,7 +81,7 @@
|
||||
home = "/home/Computational/abonerib";
|
||||
description = "Aleix Boné";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ];
|
||||
hosts = [ "owl1" "owl2" "hut" "raccoon" ];
|
||||
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
|
||||
@@ -95,96 +94,16 @@
|
||||
home = "/home/Computational/vlopez";
|
||||
description = "Victor López";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "koro" ];
|
||||
hosts = [ "koro" ];
|
||||
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
|
||||
];
|
||||
};
|
||||
|
||||
dbautist = {
|
||||
uid = 5649;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/dbautist";
|
||||
description = "Dylan Bautista Cases";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "hut" "tent" "raccoon" ];
|
||||
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
|
||||
];
|
||||
};
|
||||
|
||||
dalvare1 = {
|
||||
uid = 2758;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/dalvare1";
|
||||
description = "David Álvarez";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "hut" "tent" "fox" ];
|
||||
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
|
||||
];
|
||||
};
|
||||
|
||||
varcila = {
|
||||
uid = 5650;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/varcila";
|
||||
description = "Vincent Arcila";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "hut" "tent" "fox" ];
|
||||
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
|
||||
];
|
||||
};
|
||||
|
||||
pmartin1 = {
|
||||
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
|
||||
uid = 9652;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/pmartin1";
|
||||
description = "Pedro J. Martinez-Ferrer";
|
||||
group = "Computational";
|
||||
hosts = [ "fox" ];
|
||||
hashedPassword = "$6$nIgDMGnt4YIZl3G.$.JQ2jXLtDPRKsbsJfJAXdSvjDIzRrg7tNNjPkLPq3KJQhMjfDXRUvzagUHUU2TrE2hHM8/6uq8ex0UdxQ0ysl.";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a"
|
||||
];
|
||||
};
|
||||
|
||||
csiringo = {
|
||||
uid = 9653;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/csiringo";
|
||||
description = "Cesare Siringo";
|
||||
group = "Computational";
|
||||
hosts = [ ];
|
||||
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
|
||||
];
|
||||
};
|
||||
|
||||
acinca = {
|
||||
uid = 9654;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/acinca";
|
||||
description = "Arnau Cinca";
|
||||
group = "Computational";
|
||||
hosts = [ "apex" "hut" "fox" "owl1" "owl2" ];
|
||||
hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
groups = {
|
||||
Computational = { gid = 564; };
|
||||
tracing = { };
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
# Provides the base system for a xeon node in the SSF rack.
|
||||
imports = [
|
||||
./xeon.nix
|
||||
./ssf/fs.nix
|
||||
./ssf/hosts.nix
|
||||
./ssf/hosts-remote.nix
|
||||
./ssf/net.nix
|
||||
];
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
networking.hosts = {
|
||||
# Remote hosts visible from compute nodes
|
||||
"10.106.0.236" = [ "raccoon" ];
|
||||
"10.0.44.4" = [ "tent" ];
|
||||
};
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
networking.hosts = {
|
||||
# Login
|
||||
"10.0.40.30" = [ "apex" ];
|
||||
|
||||
# Storage
|
||||
"10.0.40.40" = [ "bay" ]; "10.0.42.40" = [ "bay-ib" ]; "10.0.40.141" = [ "bay-ipmi" ];
|
||||
"10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ];
|
||||
"10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ]; "10.0.40.143" = [ "lake2-ipmi" ];
|
||||
|
||||
# Xeon compute
|
||||
"10.0.40.1" = [ "owl1" ]; "10.0.42.1" = [ "owl1-ib" ]; "10.0.40.101" = [ "owl1-ipmi" ];
|
||||
"10.0.40.2" = [ "owl2" ]; "10.0.42.2" = [ "owl2-ib" ]; "10.0.40.102" = [ "owl2-ipmi" ];
|
||||
"10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ];
|
||||
#"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ];
|
||||
"10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ];
|
||||
"10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ];
|
||||
"10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ];
|
||||
"10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ];
|
||||
};
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
# Infiniband (IPoIB)
|
||||
environment.systemPackages = [ pkgs.rdma-core ];
|
||||
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
|
||||
|
||||
networking = {
|
||||
defaultGateway = "10.0.40.30";
|
||||
nameservers = ["8.8.8.8"];
|
||||
|
||||
firewall = {
|
||||
extraCommands = ''
|
||||
# Prevent ssfhead from contacting our slurmd daemon
|
||||
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
|
||||
# But accept traffic to slurm ports from any other node in the subnet
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
|
||||
# We also need to open the srun port range
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
{
|
||||
# Provides the base system for a xeon node, not necessarily in the SSF rack.
|
||||
# Provides the base system for a xeon node.
|
||||
imports = [
|
||||
./base.nix
|
||||
./xeon/fs.nix
|
||||
./xeon/console.nix
|
||||
./xeon/net.nix
|
||||
];
|
||||
}
|
||||
|
||||
90
m/common/xeon/net.nix
Normal file
90
m/common/xeon/net.nix
Normal file
@@ -0,0 +1,90 @@
|
||||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
# Infiniband (IPoIB)
|
||||
environment.systemPackages = [ pkgs.rdma-core ];
|
||||
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
|
||||
|
||||
networking = {
|
||||
defaultGateway = "10.0.40.30";
|
||||
nameservers = ["8.8.8.8"];
|
||||
|
||||
proxy = {
|
||||
default = "http://hut:23080/";
|
||||
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40";
|
||||
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
|
||||
# https://github.com/golang/go/issues/16715
|
||||
allProxy = null;
|
||||
};
|
||||
|
||||
firewall = {
|
||||
extraCommands = ''
|
||||
# Prevent ssfhead from contacting our slurmd daemon
|
||||
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
|
||||
# But accept traffic to slurm ports from any other node in the subnet
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
|
||||
# We also need to open the srun port range
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
|
||||
extraHosts = ''
|
||||
10.0.40.30 ssfhead
|
||||
|
||||
# Node Entry for node: mds01 (ID=72)
|
||||
10.0.40.40 bay mds01 mds01-eth0
|
||||
10.0.42.40 bay-ib mds01-ib0
|
||||
10.0.40.141 bay-ipmi mds01-ipmi0
|
||||
|
||||
# Node Entry for node: oss01 (ID=73)
|
||||
10.0.40.41 oss01 oss01-eth0
|
||||
10.0.42.41 oss01-ib0
|
||||
10.0.40.142 oss01-ipmi0
|
||||
|
||||
# Node Entry for node: oss02 (ID=74)
|
||||
10.0.40.42 lake2 oss02 oss02-eth0
|
||||
10.0.42.42 lake2-ib oss02-ib0
|
||||
10.0.40.143 lake2-ipmi oss02-ipmi0
|
||||
|
||||
# Node Entry for node: xeon01 (ID=15)
|
||||
10.0.40.1 owl1 xeon01 xeon01-eth0
|
||||
10.0.42.1 owl1-ib xeon01-ib0
|
||||
10.0.40.101 owl1-ipmi xeon01-ipmi0
|
||||
|
||||
# Node Entry for node: xeon02 (ID=16)
|
||||
10.0.40.2 owl2 xeon02 xeon02-eth0
|
||||
10.0.42.2 owl2-ib xeon02-ib0
|
||||
10.0.40.102 owl2-ipmi xeon02-ipmi0
|
||||
|
||||
# Node Entry for node: xeon03 (ID=17)
|
||||
10.0.40.3 xeon03 xeon03-eth0
|
||||
10.0.42.3 xeon03-ib0
|
||||
10.0.40.103 xeon03-ipmi0
|
||||
|
||||
# Node Entry for node: xeon04 (ID=18)
|
||||
10.0.40.4 xeon04 xeon04-eth0
|
||||
10.0.42.4 xeon04-ib0
|
||||
10.0.40.104 xeon04-ipmi0
|
||||
|
||||
# Node Entry for node: xeon05 (ID=19)
|
||||
10.0.40.5 koro xeon05 xeon05-eth0
|
||||
10.0.42.5 koro-ib xeon05-ib0
|
||||
10.0.40.105 koro-ipmi xeon05-ipmi0
|
||||
|
||||
# Node Entry for node: xeon06 (ID=20)
|
||||
10.0.40.6 xeon06 xeon06-eth0
|
||||
10.0.42.6 xeon06-ib0
|
||||
10.0.40.106 xeon06-ipmi0
|
||||
|
||||
# Node Entry for node: xeon07 (ID=21)
|
||||
10.0.40.7 hut xeon07 xeon07-eth0
|
||||
10.0.42.7 hut-ib xeon07-ib0
|
||||
10.0.40.107 hut-ipmi xeon07-ipmi0
|
||||
|
||||
# Node Entry for node: xeon08 (ID=22)
|
||||
10.0.40.8 eudy xeon08 xeon08-eth0
|
||||
10.0.42.8 eudy-ib xeon08-ib0
|
||||
10.0.40.108 eudy-ipmi xeon08-ipmi0
|
||||
'';
|
||||
};
|
||||
}
|
||||
@@ -2,14 +2,13 @@
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../common/xeon.nix
|
||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||
|
||||
./kernel/kernel.nix
|
||||
./cpufreq.nix
|
||||
./fs.nix
|
||||
./users.nix
|
||||
../module/hut-substituter.nix
|
||||
../module/debuginfod.nix
|
||||
];
|
||||
|
||||
|
||||
10326
m/eudy/kernel/configs/defconfig
Normal file
10326
m/eudy/kernel/configs/defconfig
Normal file
File diff suppressed because it is too large
Load Diff
10333
m/eudy/kernel/configs/lockdep
Normal file
10333
m/eudy/kernel/configs/lockdep
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,112 +0,0 @@
|
||||
{ lib, config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/base.nix
|
||||
../common/xeon/console.nix
|
||||
../module/amd-uprof.nix
|
||||
../module/emulation.nix
|
||||
../module/nvidia.nix
|
||||
../module/slurm-client.nix
|
||||
../module/hut-substituter.nix
|
||||
./wireguard.nix
|
||||
];
|
||||
|
||||
# Don't turn off on August as UPC has different dates.
|
||||
# Fox works fine on power cuts.
|
||||
systemd.timers.august-shutdown.enable = false;
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
|
||||
|
||||
# No swap, there is plenty of RAM
|
||||
swapDevices = lib.mkForce [];
|
||||
|
||||
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
|
||||
boot.kernelModules = [ "kvm-amd" "amd_uncore" "amd_hsmp" ];
|
||||
|
||||
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
|
||||
|
||||
# Use performance for benchmarks
|
||||
powerManagement.cpuFreqGovernor = "performance";
|
||||
|
||||
services.amd-uprof.enable = true;
|
||||
|
||||
# Disable NUMA balancing
|
||||
boot.kernel.sysctl."kernel.numa_balancing" = 0;
|
||||
|
||||
# Expose kernel addresses
|
||||
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
|
||||
|
||||
# Disable NMI watchdog to save one hw counter (for AMD uProf)
|
||||
boot.kernel.sysctl."kernel.nmi_watchdog" = 0;
|
||||
|
||||
services.openssh.settings.X11Forwarding = true;
|
||||
|
||||
services.fail2ban.enable = true;
|
||||
|
||||
networking = {
|
||||
timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
|
||||
hostName = "fox";
|
||||
# UPC network (may change over time, use DHCP)
|
||||
# Public IP configuration:
|
||||
# - Hostname: fox.ac.upc.edu
|
||||
# - IP: 147.83.30.141
|
||||
# - Gateway: 147.83.30.130
|
||||
# - NetMask: 255.255.255.192
|
||||
# Private IP configuration for BMC:
|
||||
# - Hostname: fox-ipmi.ac.upc.edu
|
||||
# - IP: 147.83.35.27
|
||||
# - Gateway: 147.83.35.2
|
||||
# - NetMask: 255.255.255.0
|
||||
interfaces.enp1s0f0np0.useDHCP = true;
|
||||
};
|
||||
|
||||
# Recommended for new graphics cards
|
||||
hardware.nvidia.open = true;
|
||||
|
||||
# Mount NVME disks
|
||||
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
|
||||
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
|
||||
|
||||
# Mount the NFS home
|
||||
fileSystems."/nfs/home" = {
|
||||
device = "10.106.0.30:/home";
|
||||
fsType = "nfs";
|
||||
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
||||
};
|
||||
|
||||
# Make a /nvme{0,1}/$USER directory for each user.
|
||||
systemd.services.create-nvme-dirs = let
|
||||
# Take only normal users in fox
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
commands = lib.concatLists (lib.mapAttrsToList
|
||||
(_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}"
|
||||
]) users);
|
||||
script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands);
|
||||
in {
|
||||
enable = true;
|
||||
wants = [ "local-fs.target" ];
|
||||
after = [ "local-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
|
||||
# Only allow SSH connections from users who have a SLURM allocation
|
||||
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
security.pam.services.sshd.rules.account.slurm = {
|
||||
control = "required";
|
||||
enable = true;
|
||||
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
|
||||
args = [ "log_level=debug5" ];
|
||||
order = 999999; # Make it last one
|
||||
};
|
||||
|
||||
# Disable systemd session (pam_systemd.so) as it will conflict with the
|
||||
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
|
||||
# into the slurmstepd task and then into the systemd session, which is not
|
||||
# what we want, otherwise it will linger even if all jobs are gone.
|
||||
security.pam.services.sshd.startSession = lib.mkForce false;
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
{ config, ... }:
|
||||
|
||||
{
|
||||
networking.firewall = {
|
||||
allowedUDPPorts = [ 666 ];
|
||||
};
|
||||
|
||||
age.secrets.wgFox.file = ../../secrets/wg-fox.age;
|
||||
|
||||
networking.wireguard.enable = true;
|
||||
networking.wireguard.interfaces = {
|
||||
# "wg0" is the network interface name. You can name the interface arbitrarily.
|
||||
wg0 = {
|
||||
# Determines the IP address and subnet of the server's end of the tunnel interface.
|
||||
ips = [ "10.106.0.1/24" ];
|
||||
|
||||
# The port that WireGuard listens to. Must be accessible by the client.
|
||||
listenPort = 666;
|
||||
|
||||
# Path to the private key file.
|
||||
privateKeyFile = config.age.secrets.wgFox.path;
|
||||
# Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
|
||||
|
||||
peers = [
|
||||
# List of allowed peers.
|
||||
{
|
||||
name = "apex";
|
||||
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
|
||||
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
|
||||
allowedIPs = [ "10.106.0.30/32" "10.0.40.7/32" ];
|
||||
}
|
||||
{
|
||||
name = "raccoon";
|
||||
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
|
||||
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
networking.hosts = {
|
||||
"10.106.0.30" = [ "apex" ];
|
||||
"10.0.40.7" = [ "hut" ];
|
||||
"10.106.0.236" = [ "raccoon" ];
|
||||
"10.0.44.4" = [ "tent" ];
|
||||
};
|
||||
|
||||
networking.firewall = {
|
||||
extraCommands = ''
|
||||
# Accept slurm connections to slurmd from apex (via wireguard)
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
}
|
||||
@@ -3,12 +3,160 @@ modules:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
proxy_url: "http://127.0.0.1:23080"
|
||||
skip_resolve_phase_with_proxy: true
|
||||
follow_redirects: true
|
||||
preferred_ip_protocol: "ip4"
|
||||
valid_status_codes: [] # Defaults to 2xx
|
||||
method: GET
|
||||
http_with_proxy:
|
||||
prober: http
|
||||
http:
|
||||
proxy_url: "http://127.0.0.1:3128"
|
||||
skip_resolve_phase_with_proxy: true
|
||||
http_with_proxy_and_headers:
|
||||
prober: http
|
||||
http:
|
||||
proxy_url: "http://127.0.0.1:3128"
|
||||
proxy_connect_header:
|
||||
Proxy-Authorization:
|
||||
- Bearer token
|
||||
http_post_2xx:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
method: POST
|
||||
headers:
|
||||
Content-Type: application/json
|
||||
body: '{}'
|
||||
http_post_body_file:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
method: POST
|
||||
body_file: "/files/body.txt"
|
||||
http_basic_auth_example:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
method: POST
|
||||
headers:
|
||||
Host: "login.example.com"
|
||||
basic_auth:
|
||||
username: "username"
|
||||
password: "mysecret"
|
||||
http_2xx_oauth_client_credentials:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
|
||||
follow_redirects: true
|
||||
preferred_ip_protocol: "ip4"
|
||||
valid_status_codes:
|
||||
- 200
|
||||
- 201
|
||||
oauth2:
|
||||
client_id: "client_id"
|
||||
client_secret: "client_secret"
|
||||
token_url: "https://api.example.com/token"
|
||||
endpoint_params:
|
||||
grant_type: "client_credentials"
|
||||
http_custom_ca_example:
|
||||
prober: http
|
||||
http:
|
||||
method: GET
|
||||
tls_config:
|
||||
ca_file: "/certs/my_cert.crt"
|
||||
http_gzip:
|
||||
prober: http
|
||||
http:
|
||||
method: GET
|
||||
compression: gzip
|
||||
http_gzip_with_accept_encoding:
|
||||
prober: http
|
||||
http:
|
||||
method: GET
|
||||
compression: gzip
|
||||
headers:
|
||||
Accept-Encoding: gzip
|
||||
tls_connect:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
tls: true
|
||||
tcp_connect_example:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
imap_starttls:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
query_response:
|
||||
- expect: "OK.*STARTTLS"
|
||||
- send: ". STARTTLS"
|
||||
- expect: "OK"
|
||||
- starttls: true
|
||||
- send: ". capability"
|
||||
- expect: "CAPABILITY IMAP4rev1"
|
||||
smtp_starttls:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
query_response:
|
||||
- expect: "^220 ([^ ]+) ESMTP (.+)$"
|
||||
- send: "EHLO prober\r"
|
||||
- expect: "^250-STARTTLS"
|
||||
- send: "STARTTLS\r"
|
||||
- expect: "^220"
|
||||
- starttls: true
|
||||
- send: "EHLO prober\r"
|
||||
- expect: "^250-AUTH"
|
||||
- send: "QUIT\r"
|
||||
irc_banner_example:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
query_response:
|
||||
- send: "NICK prober"
|
||||
- send: "USER prober prober prober :prober"
|
||||
- expect: "PING :([^ ]+)"
|
||||
send: "PONG ${1}"
|
||||
- expect: "^:[^ ]+ 001"
|
||||
icmp:
|
||||
prober: icmp
|
||||
timeout: 5s
|
||||
icmp:
|
||||
preferred_ip_protocol: "ip4"
|
||||
dns_udp_example:
|
||||
prober: dns
|
||||
timeout: 5s
|
||||
dns:
|
||||
query_name: "www.prometheus.io"
|
||||
query_type: "A"
|
||||
valid_rcodes:
|
||||
- NOERROR
|
||||
validate_answer_rrs:
|
||||
fail_if_matches_regexp:
|
||||
- ".*127.0.0.1"
|
||||
fail_if_all_match_regexp:
|
||||
- ".*127.0.0.1"
|
||||
fail_if_not_matches_regexp:
|
||||
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
|
||||
fail_if_none_matches_regexp:
|
||||
- "127.0.0.1"
|
||||
validate_authority_rrs:
|
||||
fail_if_matches_regexp:
|
||||
- ".*127.0.0.1"
|
||||
validate_additional_rrs:
|
||||
fail_if_matches_regexp:
|
||||
- ".*127.0.0.1"
|
||||
dns_soa:
|
||||
prober: dns
|
||||
dns:
|
||||
query_name: "prometheus.io"
|
||||
query_type: "SOA"
|
||||
dns_tcp_example:
|
||||
prober: dns
|
||||
dns:
|
||||
transport_protocol: "tcp" # defaults to "udp"
|
||||
preferred_ip_protocol: "ip4" # defaults to "ip6"
|
||||
query_name: "www.prometheus.io"
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../common/xeon.nix
|
||||
|
||||
../module/ceph.nix
|
||||
../module/debuginfod.nix
|
||||
../module/emulation.nix
|
||||
../module/slurm-client.nix
|
||||
./gitlab-runner.nix
|
||||
./monitoring.nix
|
||||
./nfs.nix
|
||||
./slurm-server.nix
|
||||
./nix-serve.nix
|
||||
./public-inbox.nix
|
||||
./gitea.nix
|
||||
@@ -21,21 +23,11 @@
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f";
|
||||
boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";
|
||||
|
||||
fileSystems = {
|
||||
"/" = lib.mkForce {
|
||||
device = "/dev/disk/by-label/nvme";
|
||||
fsType = "ext4";
|
||||
neededForBoot = true;
|
||||
options = [ "noatime" ];
|
||||
};
|
||||
|
||||
"/boot" = lib.mkForce {
|
||||
device = "/dev/disk/by-label/nixos-boot";
|
||||
fsType = "ext4";
|
||||
neededForBoot = true;
|
||||
};
|
||||
fileSystems."/nvme" = {
|
||||
fsType = "ext4";
|
||||
device = "/dev/disk/by-label/nvme";
|
||||
};
|
||||
|
||||
networking = {
|
||||
@@ -54,11 +46,6 @@
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept
|
||||
'';
|
||||
# Flush all rules and chains on stop so it won't break on start
|
||||
extraStopCommands = ''
|
||||
iptables -F
|
||||
iptables -X
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
|
||||
{
|
||||
age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age;
|
||||
age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age;
|
||||
age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age;
|
||||
age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age;
|
||||
age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age;
|
||||
|
||||
services.gitlab-runner = {
|
||||
enable = true;
|
||||
@@ -22,90 +21,21 @@
|
||||
"--docker-network-mode host"
|
||||
];
|
||||
environmentVariables = {
|
||||
https_proxy = "http://hut:23080";
|
||||
http_proxy = "http://hut:23080";
|
||||
https_proxy = "http://localhost:23080";
|
||||
http_proxy = "http://localhost:23080";
|
||||
};
|
||||
};
|
||||
in {
|
||||
# For pm.bsc.es/gitlab
|
||||
gitlab-pm-shell = common-shell // {
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path;
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path;
|
||||
};
|
||||
gitlab-pm-docker = common-docker // {
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path;
|
||||
};
|
||||
|
||||
gitlab-bsc-docker = {
|
||||
# gitlab.bsc.es still uses the old token mechanism
|
||||
registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path;
|
||||
tagList = [ "docker" "hut" ];
|
||||
environmentVariables = {
|
||||
# We cannot access the hut local interface from docker, so we connect
|
||||
# to hut directly via the ethernet one.
|
||||
https_proxy = "http://hut:23080";
|
||||
http_proxy = "http://hut:23080";
|
||||
};
|
||||
executor = "docker";
|
||||
dockerImage = "alpine";
|
||||
dockerVolumes = [
|
||||
"/nix/store:/nix/store:ro"
|
||||
"/nix/var/nix/db:/nix/var/nix/db:ro"
|
||||
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
|
||||
];
|
||||
dockerExtraHosts = [
|
||||
# Required to pass the proxy via hut
|
||||
"hut:10.0.40.7"
|
||||
];
|
||||
dockerDisableCache = true;
|
||||
registrationFlags = [
|
||||
# Increase build log length to 64 MiB
|
||||
"--output-limit 65536"
|
||||
];
|
||||
preBuildScript = pkgs.writeScript "setup-container" ''
|
||||
mkdir -p -m 0755 /nix/var/log/nix/drvs
|
||||
mkdir -p -m 0755 /nix/var/nix/gcroots
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles
|
||||
mkdir -p -m 0755 /nix/var/nix/temproots
|
||||
mkdir -p -m 0755 /nix/var/nix/userpool
|
||||
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
|
||||
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
|
||||
mkdir -p -m 0700 "$HOME/.nix-defexpr"
|
||||
mkdir -p -m 0700 "$HOME/.ssh"
|
||||
cat > "$HOME/.ssh/config" << EOF
|
||||
Host bscpm04.bsc.es gitlab-internal.bsc.es
|
||||
User git
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
EOF
|
||||
cat >> "$HOME/.ssh/known_hosts" << EOF
|
||||
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
|
||||
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
|
||||
EOF
|
||||
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
|
||||
# Required to load SSL certificate paths
|
||||
. ${pkgs.cacert}/nix-support/setup-hook
|
||||
'';
|
||||
environmentVariables = {
|
||||
ENV = "/etc/profile";
|
||||
USER = "root";
|
||||
NIX_REMOTE = "daemon";
|
||||
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
|
||||
};
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# DOCKER* chains are useless, override at FORWARD and nixos-fw
|
||||
networking.firewall.extraCommands = ''
|
||||
# Don't forward any traffic from docker
|
||||
iptables -I FORWARD 1 -p all -i docker0 -j nixos-fw-log-refuse
|
||||
|
||||
# Allow incoming traffic from docker to 23080
|
||||
iptables -A nixos-fw -p tcp -i docker0 -d hut --dport 23080 -j ACCEPT
|
||||
'';
|
||||
|
||||
#systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash";
|
||||
systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false;
|
||||
systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner";
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { }
|
||||
''
|
||||
cp ${./gpfs-probe.sh} $out;
|
||||
chmod +x $out
|
||||
''
|
||||
;
|
||||
in
|
||||
{
|
||||
# Use a new user to handle the SSH keys
|
||||
users.groups.ssh-robot = { };
|
||||
users.users.ssh-robot = {
|
||||
description = "SSH Robot";
|
||||
isNormalUser = true;
|
||||
home = "/var/lib/ssh-robot";
|
||||
};
|
||||
|
||||
systemd.services.gpfs-probe = {
|
||||
description = "Daemon to report GPFS latency via SSH";
|
||||
path = [ pkgs.openssh pkgs.netcat ];
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "default.target" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}";
|
||||
User = "ssh-robot";
|
||||
Group = "ssh-robot";
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
N=500
|
||||
|
||||
t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
|
||||
|
||||
if [ -z "$t" ]; then
|
||||
t="5.00"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
|
||||
|
||||
# HELP gpfs_touch_latency Time to create $N files.
|
||||
# TYPE gpfs_touch_latency gauge
|
||||
gpfs_touch_latency $t
|
||||
EOF
|
||||
13
m/hut/ipmi.yml
Normal file
13
m/hut/ipmi.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
modules:
|
||||
default:
|
||||
collectors:
|
||||
- bmc
|
||||
- ipmi
|
||||
- chassis
|
||||
|
||||
lan:
|
||||
collectors:
|
||||
- ipmi
|
||||
- chassis
|
||||
user: ""
|
||||
pass: ""
|
||||
@@ -1,13 +1,7 @@
|
||||
{ config, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../module/slurm-exporter.nix
|
||||
../module/meteocat-exporter.nix
|
||||
../module/upc-qaire-exporter.nix
|
||||
./gpfs-probe.nix
|
||||
../module/nix-daemon-exporter.nix
|
||||
];
|
||||
imports = [ ../module/slurm-exporter.nix ];
|
||||
|
||||
age.secrets.grafanaJungleRobotPassword = {
|
||||
file = ../../secrets/jungle-robot-password.age;
|
||||
@@ -15,8 +9,6 @@
|
||||
mode = "400";
|
||||
};
|
||||
|
||||
age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age;
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
@@ -49,7 +41,7 @@
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = 9001;
|
||||
retentionTime = "5y";
|
||||
retentionTime = "1y";
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
|
||||
@@ -78,13 +70,13 @@
|
||||
enable = true;
|
||||
group = "root";
|
||||
user = "root";
|
||||
configFile = config.age.secrets.ipmiYml.path;
|
||||
# extraFlags = [ "--log.level=debug" ];
|
||||
configFile = ./ipmi.yml;
|
||||
#extraFlags = [ "--log.level=debug" ];
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" "logind" ];
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
@@ -110,10 +102,6 @@
|
||||
"127.0.0.1:9252"
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
|
||||
"127.0.0.1:9341" # Slurm exporter
|
||||
"127.0.0.1:9966" # GPFS custom exporter
|
||||
"127.0.0.1:9999" # Nix-daemon custom exporter
|
||||
"127.0.0.1:9929" # Meteocat custom exporter
|
||||
"127.0.0.1:9928" # UPC Qaire custom exporter
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
|
||||
];
|
||||
}];
|
||||
@@ -169,9 +157,6 @@
|
||||
"8.8.8.8"
|
||||
"ssfhead"
|
||||
"anella-bsc.cesca.cat"
|
||||
"upc-anella.cesca.cat"
|
||||
"fox.ac.upc.edu"
|
||||
"arenys5.ac.upc.edu"
|
||||
];
|
||||
}];
|
||||
relabel_configs = [
|
||||
@@ -217,7 +202,7 @@
|
||||
# Sets the "instance" label with the remote host we are querying
|
||||
source_labels = [ "__param_target" ];
|
||||
separator = ";";
|
||||
regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end
|
||||
regex = "(.*)";
|
||||
target_label = "instance";
|
||||
replacement = "\${1}";
|
||||
action = "replace";
|
||||
@@ -259,14 +244,6 @@
|
||||
module = [ "raccoon" ];
|
||||
};
|
||||
}
|
||||
{
|
||||
job_name = "raccoon";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "127.0.0.1:19002" ]; # Node exporter
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,76 +1,14 @@
|
||||
{ theFlake, pkgs, ... }:
|
||||
let
|
||||
website = pkgs.stdenv.mkDerivation {
|
||||
name = "jungle-web";
|
||||
src = pkgs.fetchgit {
|
||||
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
|
||||
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
|
||||
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
|
||||
};
|
||||
buildInputs = [ pkgs.hugo ];
|
||||
buildPhase = ''
|
||||
rm -rf public/
|
||||
hugo
|
||||
'';
|
||||
installPhase = ''
|
||||
cp -r public $out
|
||||
'';
|
||||
# Don't mess doc/
|
||||
dontFixup = true;
|
||||
};
|
||||
in
|
||||
{
|
||||
networking.firewall.allowedTCPPorts = [ 80 ];
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
virtualHosts."jungle.bsc.es" = {
|
||||
root = "${website}";
|
||||
listen = [
|
||||
{
|
||||
addr = "0.0.0.0";
|
||||
port = 80;
|
||||
addr = "127.0.0.1";
|
||||
port = 8123;
|
||||
}
|
||||
];
|
||||
extraConfig = ''
|
||||
set_real_ip_from 127.0.0.1;
|
||||
set_real_ip_from 84.88.52.107;
|
||||
real_ip_recursive on;
|
||||
real_ip_header X-Forwarded-For;
|
||||
|
||||
location /git {
|
||||
rewrite ^/git$ / break;
|
||||
rewrite ^/git/(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:3000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /cache {
|
||||
rewrite ^/cache/(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:5000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /lists {
|
||||
proxy_pass http://127.0.0.1:8081;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /grafana {
|
||||
proxy_pass http://127.0.0.1:2342;
|
||||
proxy_redirect http:// $scheme://;
|
||||
proxy_set_header Host $host;
|
||||
# Websockets
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
location ~ ^/~(.+?)(/.*)?$ {
|
||||
alias /ceph/home/$1/public_html$2;
|
||||
index index.html index.htm;
|
||||
autoindex on;
|
||||
absolute_redirect off;
|
||||
}
|
||||
location /p/ {
|
||||
alias /ceph/p/;
|
||||
}
|
||||
'';
|
||||
locations."/p/".alias = "/ceph/p/";
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
29
m/hut/p.nix
29
m/hut/p.nix
@@ -1,4 +1,4 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
p = pkgs.writeShellScriptBin "p" ''
|
||||
set -e
|
||||
@@ -6,14 +6,12 @@ let
|
||||
pastedir="p/$USER"
|
||||
mkdir -p "$pastedir"
|
||||
|
||||
ext="txt"
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
ext="$1"
|
||||
out="$pastedir/$1"
|
||||
else
|
||||
out=$(mktemp "$pastedir/XXXXXXXX.txt")
|
||||
fi
|
||||
|
||||
out=$(mktemp "$pastedir/XXXXXXXX.$ext")
|
||||
|
||||
cat > "$out"
|
||||
chmod go+r "$out"
|
||||
echo "https://jungle.bsc.es/$out"
|
||||
@@ -21,23 +19,4 @@ let
|
||||
in
|
||||
{
|
||||
environment.systemPackages = with pkgs; [ p ];
|
||||
|
||||
# Make sure we have a directory per user. We cannot use the nice
|
||||
# systemd-tmpfiles-setup.service service because this is a remote FS, and it
|
||||
# may not be mounted when it runs.
|
||||
systemd.services.create-paste-dirs = let
|
||||
# Take only normal users in hut
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
commands = lib.concatLists (lib.mapAttrsToList
|
||||
(_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}"
|
||||
]) users);
|
||||
script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands);
|
||||
in {
|
||||
enable = true;
|
||||
wants = [ "remote-fs.target" ];
|
||||
after = [ "remote-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
}
|
||||
|
||||
7
m/hut/slurm-server.nix
Normal file
7
m/hut/slurm-server.nix
Normal file
@@ -0,0 +1,7 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
services.slurm = {
|
||||
server.enable = true;
|
||||
};
|
||||
}
|
||||
@@ -1,15 +1,15 @@
|
||||
- targets:
|
||||
- owl1-ipmi
|
||||
- owl2-ipmi
|
||||
- xeon03-ipmi
|
||||
- xeon04-ipmi
|
||||
- koro-ipmi
|
||||
- weasel-ipmi
|
||||
- hut-ipmi
|
||||
- eudy-ipmi
|
||||
- 10.0.40.101
|
||||
- 10.0.40.102
|
||||
- 10.0.40.103
|
||||
- 10.0.40.104
|
||||
- 10.0.40.105
|
||||
- 10.0.40.106
|
||||
- 10.0.40.107
|
||||
- 10.0.40.108
|
||||
# Storage
|
||||
- bay-ipmi
|
||||
- oss01-ipmi
|
||||
- lake2-ipmi
|
||||
- 10.0.40.141
|
||||
- 10.0.40.142
|
||||
- 10.0.40.143
|
||||
labels:
|
||||
job: ipmi-lan
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../common/xeon.nix
|
||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||
|
||||
../eudy/cpufreq.nix
|
||||
|
||||
@@ -2,9 +2,8 @@
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../common/xeon.nix
|
||||
../module/monitoring.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
||||
|
||||
70
m/map.nix
70
m/map.nix
@@ -1,70 +0,0 @@
|
||||
{
|
||||
# In physical order from top to bottom (see note below)
|
||||
ssf = {
|
||||
# Switches for Ethernet and OmniPath
|
||||
switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; };
|
||||
switch-opa = { pos=41; size=1; };
|
||||
|
||||
# SSF login
|
||||
apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; };
|
||||
|
||||
# Storage
|
||||
bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
|
||||
lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; };
|
||||
lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; };
|
||||
|
||||
# Compute xeon
|
||||
owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; };
|
||||
owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; };
|
||||
xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
|
||||
# Slot 34 empty
|
||||
koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
|
||||
weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
|
||||
hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
|
||||
eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };
|
||||
|
||||
# 16 KNL nodes, 4 per chassis
|
||||
knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; };
|
||||
knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; };
|
||||
knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; };
|
||||
knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; };
|
||||
|
||||
# Slot 19 empty
|
||||
|
||||
# EPI (hw team, guessed order)
|
||||
epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; };
|
||||
epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; };
|
||||
epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; };
|
||||
anon = { pos=14; size=2; }; # Unlabeled machine. Operative
|
||||
|
||||
# These are old and decommissioned (off)
|
||||
power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; };
|
||||
powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; };
|
||||
gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; };
|
||||
odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; };
|
||||
amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic
|
||||
moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; };
|
||||
};
|
||||
|
||||
bsc2218 = {
|
||||
raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; };
|
||||
tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; };
|
||||
};
|
||||
|
||||
upc = {
|
||||
fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; };
|
||||
};
|
||||
|
||||
# NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the
|
||||
# bottom. Example:
|
||||
#
|
||||
# | ... | - [pos+size] <--- Label in chassis
|
||||
# +--------+
|
||||
# | node | - [pos+1]
|
||||
# | 2U | - [pos]
|
||||
# +------- +
|
||||
# | ... | - [pos-1]
|
||||
#
|
||||
# NOTE: The board and sn refers to the FRU information (Board Product and
|
||||
# Board Serial) via `ipmitool fru print 0`.
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
{
|
||||
options = {
|
||||
services.amd-uprof = {
|
||||
enable = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = false;
|
||||
description = "Whether to enable AMD uProf.";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Only setup amd-uprof if enabled
|
||||
config = lib.mkIf config.services.amd-uprof.enable {
|
||||
|
||||
# First make sure that we add the module to the list of available modules
|
||||
# in the kernel matching the same kernel version of this configuration.
|
||||
boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ];
|
||||
boot.kernelModules = [ "AMDPowerProfiler" ];
|
||||
|
||||
# Make the userspace tools available in $PATH.
|
||||
environment.systemPackages = with pkgs; [ amd-uprof ];
|
||||
|
||||
# The AMDPowerProfiler module doesn't create the /dev device nor it emits
|
||||
# any uevents, so we cannot use udev rules to automatically create the
|
||||
# device. Instead, we run a systemd unit that does it after loading the
|
||||
# modules.
|
||||
systemd.services.amd-uprof-device = {
|
||||
description = "Create /dev/AMDPowerProfiler device";
|
||||
after = [ "systemd-modules-load.service" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
unitConfig.ConditionPathExists = [
|
||||
"/proc/AMDPowerProfiler/device"
|
||||
"!/dev/AMDPowerProfiler"
|
||||
];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" ''
|
||||
mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0
|
||||
'';
|
||||
ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" ''
|
||||
rm -f /dev/AMDPowerProfiler
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
age.secrets.cephUser.file = ../../secrets/ceph-user.age;
|
||||
|
||||
fileSystems."/ceph" = {
|
||||
fileSystems."/ceph-slow" = {
|
||||
fsType = "ceph";
|
||||
device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/";
|
||||
options = [
|
||||
@@ -21,4 +21,16 @@
|
||||
"secretfile=${config.age.secrets.cephUser.path}"
|
||||
];
|
||||
};
|
||||
|
||||
services.cachefilesd.enable = true;
|
||||
|
||||
fileSystems."/ceph" = {
|
||||
fsType = "ceph";
|
||||
device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/";
|
||||
options = [
|
||||
"fsc"
|
||||
"mon_addr=10.0.40.40"
|
||||
"secretfile=${config.age.secrets.cephUser.path}"
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
{ config, ... }:
|
||||
{
|
||||
nix.settings =
|
||||
# Don't add hut as a cache to itself
|
||||
assert config.networking.hostName != "hut";
|
||||
{
|
||||
extra-substituters = [ "http://hut/cache" ];
|
||||
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
||||
|
||||
# Set a low timeout in case hut is down
|
||||
connect-timeout = 3; # seconds
|
||||
};
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
{
|
||||
systemd.services."prometheus-meteocat-exporter" = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Restart = mkDefault "always";
|
||||
PrivateTmp = mkDefault true;
|
||||
WorkingDirectory = mkDefault "/tmp";
|
||||
DynamicUser = mkDefault true;
|
||||
ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter";
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Locate nix daemon pid
|
||||
nd=$(pgrep -o nix-daemon)
|
||||
|
||||
# Locate children of nix-daemon
|
||||
pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children")
|
||||
|
||||
# For each children, locate 2nd level children
|
||||
pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n')
|
||||
|
||||
cat <<EOF
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
|
||||
|
||||
# HELP nix_daemon_build Nix daemon derivation build state.
|
||||
# TYPE nix_daemon_build gauge
|
||||
EOF
|
||||
|
||||
for pid in $pids2; do
|
||||
name=$(cat /proc/$pid/environ 2>/dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.')
|
||||
user=$(ps -o uname= -p "$pid")
|
||||
if [ -n "$name" -a -n "$user" ]; then
|
||||
printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name"
|
||||
fi
|
||||
done
|
||||
@@ -1,23 +0,0 @@
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
script = pkgs.runCommand "nix-daemon-exporter.sh" { }
|
||||
''
|
||||
cp ${./nix-daemon-builds.sh} $out;
|
||||
chmod +x $out
|
||||
''
|
||||
;
|
||||
in
|
||||
{
|
||||
systemd.services.nix-daemon-exporter = {
|
||||
description = "Daemon to export nix-daemon metrics";
|
||||
path = [ pkgs.procps pkgs.ripgrep ];
|
||||
wantedBy = [ "default.target" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}";
|
||||
# Needed root to read the environment, potentially unsafe
|
||||
User = "root";
|
||||
Group = "root";
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
{ lib, config, pkgs, ... }:
|
||||
{
|
||||
# Configure Nvidia driver to use with CUDA
|
||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
||||
hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set");
|
||||
hardware.graphics.enable = true;
|
||||
nixpkgs.config.nvidia.acceptLicense = true;
|
||||
services.xserver.videoDrivers = [ "nvidia" ];
|
||||
|
||||
# enable support for derivations which require nvidia-gpu to be available
|
||||
# > requiredSystemFeatures = [ "cuda" ];
|
||||
programs.nix-required-mounts.enable = true;
|
||||
programs.nix-required-mounts.presets.nvidia-gpu.enable = true;
|
||||
# They forgot to add the symlink
|
||||
programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [
|
||||
config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument
|
||||
];
|
||||
|
||||
environment.systemPackages = [ pkgs.cudainfo ];
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
let
|
||||
cfg = config.services.p;
|
||||
in
|
||||
{
|
||||
options = {
|
||||
services.p = {
|
||||
enable = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = false;
|
||||
description = "Whether to enable the p service.";
|
||||
};
|
||||
path = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "/var/lib/p";
|
||||
description = "Where to save the pasted files on disk.";
|
||||
};
|
||||
url = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "https://jungle.bsc.es/p";
|
||||
description = "URL prefix for the printed file.";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
environment.systemPackages = let
|
||||
p = pkgs.writeShellScriptBin "p" ''
|
||||
set -e
|
||||
pastedir="${cfg.path}/$USER"
|
||||
cd "$pastedir"
|
||||
|
||||
ext="txt"
|
||||
if [ -n "$1" ]; then
|
||||
ext="$1"
|
||||
fi
|
||||
|
||||
out=$(mktemp "XXXXXXXX.$ext")
|
||||
cat > "$out"
|
||||
chmod go+r "$out"
|
||||
echo "${cfg.url}/$USER/$out"
|
||||
'';
|
||||
in [ p ];
|
||||
|
||||
systemd.services.p = let
|
||||
# Take only normal users
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
# Create a directory for each user
|
||||
commands = lib.concatLists (lib.mapAttrsToList (_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0755 ${cfg.path}/${user.name}"
|
||||
]) users);
|
||||
in {
|
||||
description = "P service setup";
|
||||
requires = [ "network-online.target" ];
|
||||
#wants = [ "remote-fs.target" ];
|
||||
#after = [ "remote-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = pkgs.writeShellScript "p-init.sh" (''
|
||||
|
||||
install -d -o root -g root -m 0755 ${cfg.path}
|
||||
|
||||
'' + (lib.concatLines commands));
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.power.policy;
|
||||
in
|
||||
{
|
||||
options = {
|
||||
power.policy = mkOption {
|
||||
type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]);
|
||||
default = null;
|
||||
description = "Set power policy to use via IPMI.";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf (cfg != null) {
|
||||
systemd.services."power-policy" = {
|
||||
description = "Set power policy to use via IPMI";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
unitConfig = {
|
||||
StartLimitBurst = "10";
|
||||
StartLimitIntervalSec = "10m";
|
||||
};
|
||||
serviceConfig = {
|
||||
ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}";
|
||||
Type = "oneshot";
|
||||
Restart = "on-failure";
|
||||
RestartSec = "5s";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,10 +1,33 @@
|
||||
{ lib, ... }:
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
./slurm-common.nix
|
||||
];
|
||||
let
|
||||
suspendProgram = pkgs.writeScript "suspend.sh" ''
|
||||
#!/usr/bin/env bash
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Shutting down host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
||||
done
|
||||
'';
|
||||
|
||||
resumeProgram = pkgs.writeScript "resume.sh" ''
|
||||
#!/usr/bin/env bash
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Starting host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
||||
done
|
||||
'';
|
||||
|
||||
in {
|
||||
systemd.services.slurmd.serviceConfig = {
|
||||
# Kill all processes in the control group on stop/restart. This will kill
|
||||
# all the jobs running, so ensure that we only upgrade when the nodes are
|
||||
@@ -12,13 +35,73 @@
|
||||
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
||||
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
||||
KillMode = lib.mkForce "control-group";
|
||||
|
||||
# If slurmd fails to contact the control server it will fail, causing the
|
||||
# node to remain out of service until manually restarted. Always try to
|
||||
# restart it.
|
||||
Restart = "always";
|
||||
RestartSec = "30s";
|
||||
};
|
||||
|
||||
services.slurm.client.enable = true;
|
||||
services.slurm = {
|
||||
client.enable = true;
|
||||
controlMachine = "hut";
|
||||
clusterName = "jungle";
|
||||
nodeName = [
|
||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
||||
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
||||
];
|
||||
|
||||
partitionName = [
|
||||
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
];
|
||||
|
||||
# See slurm.conf(5) for more details about these options.
|
||||
extraConfig = ''
|
||||
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
||||
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
||||
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
||||
# library in SLURM (--mpi=pmix). See more details here:
|
||||
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
||||
MpiDefault=pmix
|
||||
|
||||
# When a node reboots return that node to the slurm queue as soon as it
|
||||
# becomes operative again.
|
||||
ReturnToService=2
|
||||
|
||||
# Track all processes by using a cgroup
|
||||
ProctrackType=proctrack/cgroup
|
||||
|
||||
# Enable task/affinity to allow the jobs to run in a specified subset of
|
||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||
TaskPlugin=task/affinity,task/cgroup
|
||||
|
||||
# Power off unused nodes until they are requested
|
||||
SuspendProgram=${suspendProgram}
|
||||
SuspendTimeout=60
|
||||
ResumeProgram=${resumeProgram}
|
||||
ResumeTimeout=300
|
||||
SuspendExcNodes=hut
|
||||
|
||||
# Turn the nodes off after 1 hour of inactivity
|
||||
SuspendTime=3600
|
||||
|
||||
# Reduce port range so we can allow only this range in the firewall
|
||||
SrunPortRange=60000-61000
|
||||
|
||||
# Use cores as consumable resources. In SLURM terms, a core may have
|
||||
# multiple hardware threads (or CPUs).
|
||||
SelectType=select/cons_tres
|
||||
|
||||
# Ignore memory constraints and only use unused cores to share a node with
|
||||
# other jobs.
|
||||
SelectTypeParameters=CR_Core
|
||||
'';
|
||||
};
|
||||
|
||||
age.secrets.mungeKey = {
|
||||
file = ../../secrets/munge-key.age;
|
||||
owner = "munge";
|
||||
group = "munge";
|
||||
};
|
||||
|
||||
services.munge = {
|
||||
enable = true;
|
||||
password = config.age.secrets.mungeKey.path;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
let
|
||||
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Shutting down host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
||||
done
|
||||
'';
|
||||
|
||||
resumeProgram = pkgs.writeShellScript "resume.sh" ''
|
||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||
set -x
|
||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||
hosts=$(scontrol show hostnames $1)
|
||||
for host in $hosts; do
|
||||
echo Starting host: $host
|
||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
||||
done
|
||||
'';
|
||||
|
||||
in {
|
||||
services.slurm = {
|
||||
controlMachine = "apex";
|
||||
clusterName = "jungle";
|
||||
nodeName = [
|
||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
||||
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
|
||||
];
|
||||
|
||||
partitionName = [
|
||||
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
];
|
||||
|
||||
# See slurm.conf(5) for more details about these options.
|
||||
extraConfig = ''
|
||||
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
||||
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
||||
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
||||
# library in SLURM (--mpi=pmix). See more details here:
|
||||
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
||||
MpiDefault=pmix
|
||||
|
||||
# When a node reboots return that node to the slurm queue as soon as it
|
||||
# becomes operative again.
|
||||
ReturnToService=2
|
||||
|
||||
# Track all processes by using a cgroup
|
||||
ProctrackType=proctrack/cgroup
|
||||
|
||||
# Enable task/affinity to allow the jobs to run in a specified subset of
|
||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||
TaskPlugin=task/affinity,task/cgroup
|
||||
|
||||
# Power off unused nodes until they are requested
|
||||
SuspendProgram=${suspendProgram}
|
||||
SuspendTimeout=60
|
||||
ResumeProgram=${resumeProgram}
|
||||
ResumeTimeout=300
|
||||
SuspendExcNodes=fox
|
||||
|
||||
# Turn the nodes off after 1 hour of inactivity
|
||||
SuspendTime=3600
|
||||
|
||||
# Reduce port range so we can allow only this range in the firewall
|
||||
SrunPortRange=60000-61000
|
||||
|
||||
# Use cores as consumable resources. In SLURM terms, a core may have
|
||||
# multiple hardware threads (or CPUs).
|
||||
SelectType=select/cons_tres
|
||||
|
||||
# Ignore memory constraints and only use unused cores to share a node with
|
||||
# other jobs.
|
||||
SelectTypeParameters=CR_Core
|
||||
|
||||
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
# This sets up the "extern" step into which ssh-launched processes will be
|
||||
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
|
||||
# when a task runs (srun) so we can ssh early.
|
||||
PrologFlags=Alloc,Contain,X11
|
||||
|
||||
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
|
||||
# adopted by the external step, similar to tasks running in regular steps
|
||||
# LaunchParameters=ulimit_pam_adopt
|
||||
SlurmdDebug=debug5
|
||||
#DebugFlags=Protocol,Cgroup
|
||||
'';
|
||||
|
||||
extraCgroupConfig = ''
|
||||
CgroupPlugin=cgroup/v2
|
||||
#ConstrainCores=yes
|
||||
'';
|
||||
};
|
||||
|
||||
# Place the slurm config in /etc as this will be required by PAM
|
||||
environment.etc.slurm.source = config.services.slurm.etcSlurm;
|
||||
|
||||
age.secrets.mungeKey = {
|
||||
file = ../../secrets/munge-key.age;
|
||||
owner = "munge";
|
||||
group = "munge";
|
||||
};
|
||||
|
||||
services.munge = {
|
||||
enable = true;
|
||||
password = config.age.secrets.mungeKey.path;
|
||||
};
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
./slurm-common.nix
|
||||
];
|
||||
|
||||
services.slurm.server.enable = true;
|
||||
|
||||
networking.firewall = {
|
||||
extraCommands = ''
|
||||
# Accept slurm connections to controller from compute nodes
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
|
||||
# Accept slurm connections from compute nodes for srun
|
||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
||||
|
||||
# Accept slurm connections to controller from fox (via wireguard)
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
|
||||
# Accept slurm connections from fox for srun (via wireguard)
|
||||
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
|
||||
'';
|
||||
};
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
{
|
||||
systemd.services."prometheus-upc-qaire-exporter" = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Restart = mkDefault "always";
|
||||
PrivateTmp = mkDefault true;
|
||||
WorkingDirectory = mkDefault "/tmp";
|
||||
DynamicUser = mkDefault true;
|
||||
ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter";
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
{config, ...}:
|
||||
{
|
||||
age.secrets.vpn-dac-login.file = ../../secrets/vpn-dac-login.age;
|
||||
age.secrets.vpn-dac-client-key.file = ../../secrets/vpn-dac-client-key.age;
|
||||
|
||||
services.openvpn.servers = {
|
||||
# systemctl status openvpn-dac.service
|
||||
dac = {
|
||||
config = ''
|
||||
client
|
||||
dev tun
|
||||
proto tcp
|
||||
remote vpn.ac.upc.edu 1194
|
||||
remote vpn.ac.upc.edu 80
|
||||
resolv-retry infinite
|
||||
nobind
|
||||
persist-key
|
||||
persist-tun
|
||||
ca ${./vpn-dac/ca.crt}
|
||||
cert ${./vpn-dac/client.crt}
|
||||
# Only key needs to be secret
|
||||
key ${config.age.secrets.vpn-dac-client-key.path}
|
||||
remote-cert-tls server
|
||||
comp-lzo
|
||||
verb 3
|
||||
auth-user-pass ${config.age.secrets.vpn-dac-login.path}
|
||||
reneg-sec 0
|
||||
|
||||
# Only route fox-ipmi
|
||||
pull-filter ignore "route "
|
||||
route 147.83.35.27 255.255.255.255
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFUjCCBDqgAwIBAgIJAJH118PApk5hMA0GCSqGSIb3DQEBCwUAMIHLMQswCQYD
|
||||
VQQGEwJFUzESMBAGA1UECBMJQmFyY2Vsb25hMRIwEAYDVQQHEwlCYXJjZWxvbmEx
|
||||
LTArBgNVBAoTJFVuaXZlcnNpdGF0IFBvbGl0ZWNuaWNhIGRlIENhdGFsdW55YTEk
|
||||
MCIGA1UECxMbQXJxdWl0ZWN0dXJhIGRlIENvbXB1dGFkb3JzMRAwDgYDVQQDEwdM
|
||||
Q0FDIENBMQ0wCwYDVQQpEwRMQ0FDMR4wHAYJKoZIhvcNAQkBFg9sY2FjQGFjLnVw
|
||||
Yy5lZHUwHhcNMTYwMTEyMTI0NDIxWhcNNDYwMTEyMTI0NDIxWjCByzELMAkGA1UE
|
||||
BhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0w
|
||||
KwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAi
|
||||
BgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENB
|
||||
QyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMu
|
||||
ZWR1MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0CteSeof7Xwi51kC
|
||||
F0nQ4E9iR5Lq7wtfRuVPn6JJcIxJJ6+F9gr4R/HIHTztW4XAzReE36DYfexupx3D
|
||||
6UgQIkMLlVyGqRbulNF+RnCx20GosF7Dm4RGBVvOxBP1PGjYq/A+XhaaDAFd0cOF
|
||||
LMNkzuYP7PF0bnBEaHnxmN8bPmuyDyas7fK9AAc3scyWT2jSBPbOVFvCJwPg8MH9
|
||||
V/h+hKwL/7hRt1MVfVv2qyIuKwTki8mUt0RcVbP7oJoRY5K1+R52phIz/GL/b4Fx
|
||||
L6MKXlQxLi8vzP4QZXgCMyV7oFNdU3VqCEXBA11YIRvsOZ4QS19otIk/ZWU5x+HH
|
||||
LAIJ7wIDAQABo4IBNTCCATEwHQYDVR0OBBYEFNyezX1cH1N4QR14ebBpljqmtE7q
|
||||
MIIBAAYDVR0jBIH4MIH1gBTcns19XB9TeEEdeHmwaZY6prRO6qGB0aSBzjCByzEL
|
||||
MAkGA1UEBhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vs
|
||||
b25hMS0wKwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVu
|
||||
eWExJDAiBgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UE
|
||||
AxMHTENBQyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0Bh
|
||||
Yy51cGMuZWR1ggkAkfXXw8CmTmEwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsF
|
||||
AAOCAQEAUAmOvVXIQrR+aZVO0bOTeugKBHB75eTIZSIHIn2oDUvDbAP5GXIJ56A1
|
||||
6mZXxemSMY8/9k+pRcwJhfat3IgvAN159XSqf9kRv0NHgc3FWUI1Qv/BsAn0vJO/
|
||||
oK0dbmbbRWqt86qNrCN+cUfz5aovvxN73jFfnvfDQFBk/8enj9wXxYfokjjLPR1Q
|
||||
+oTkH8dY68qf71oaUB9MndppPEPSz0K1S6h1XxvJoSu9MVSXOQHiq1cdZdxRazI3
|
||||
4f7q9sTCL+khwDAuZxAYzlEYxFFa/NN8PWU6xPw6V+t/aDhOiXUPJQB/O/K7mw3Z
|
||||
TQQx5NqM7B5jjak5fauR3/oRD8XXsA==
|
||||
-----END CERTIFICATE-----
|
||||
@@ -1,100 +0,0 @@
|
||||
Certificate:
|
||||
Data:
|
||||
Version: 3 (0x2)
|
||||
Serial Number: 2 (0x2)
|
||||
Signature Algorithm: sha256WithRSAEncryption
|
||||
Issuer: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
|
||||
Validity
|
||||
Not Before: Jan 12 12:45:41 2016 GMT
|
||||
Not After : Jan 12 12:45:41 2046 GMT
|
||||
Subject: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=client/name=LCAC/emailAddress=lcac@ac.upc.edu
|
||||
Subject Public Key Info:
|
||||
Public Key Algorithm: rsaEncryption
|
||||
Public-Key: (2048 bit)
|
||||
Modulus:
|
||||
00:97:99:fa:7a:0e:4d:e2:1d:a5:b1:a8:14:18:64:
|
||||
c7:66:bf:de:99:1d:92:3b:86:82:4d:95:39:f7:a6:
|
||||
56:49:97:14:4f:e3:37:00:6c:f4:d0:1d:56:79:e7:
|
||||
19:b5:dd:36:15:8e:1d:57:7b:59:29:d2:11:bf:58:
|
||||
48:e0:f7:41:3d:16:64:8d:a2:0b:4a:ac:fa:c6:83:
|
||||
dc:10:2a:2c:d9:97:48:ee:11:2a:bc:4b:60:dd:b9:
|
||||
2e:8f:45:ca:87:0b:38:65:1c:f8:a2:1d:f9:50:aa:
|
||||
6e:60:f9:48:df:57:12:23:e1:e7:0c:81:5c:9f:c5:
|
||||
b2:e6:99:99:95:30:6d:57:36:06:8c:fd:fb:f9:4f:
|
||||
60:d2:3c:ba:ae:28:56:2f:da:58:5c:e8:c5:7b:ec:
|
||||
76:d9:28:6e:fb:8c:07:f9:d7:23:c3:72:76:3c:fa:
|
||||
dc:20:67:8f:cc:16:e0:91:07:d5:68:f9:20:4d:7d:
|
||||
5c:2d:02:04:16:76:52:f3:53:be:a3:dc:0d:d5:fb:
|
||||
6b:55:29:f3:52:35:c8:7d:99:d1:4a:94:be:b1:8e:
|
||||
fd:85:18:25:eb:41:e9:56:da:af:62:84:20:0a:00:
|
||||
17:94:92:94:91:6a:f8:54:37:17:ee:1e:bb:fb:93:
|
||||
71:91:d9:e4:e9:b8:3b:18:7d:6d:7d:4c:ce:58:55:
|
||||
f9:41
|
||||
Exponent: 65537 (0x10001)
|
||||
X509v3 extensions:
|
||||
X509v3 Basic Constraints:
|
||||
CA:FALSE
|
||||
Netscape Comment:
|
||||
Easy-RSA Generated Certificate
|
||||
X509v3 Subject Key Identifier:
|
||||
1B:88:06:D5:33:1D:5C:48:46:B5:DE:78:89:36:96:91:3A:74:43:18
|
||||
X509v3 Authority Key Identifier:
|
||||
keyid:DC:9E:CD:7D:5C:1F:53:78:41:1D:78:79:B0:69:96:3A:A6:B4:4E:EA
|
||||
DirName:/C=ES/ST=Barcelona/L=Barcelona/O=Universitat Politecnica de Catalunya/OU=Arquitectura de Computadors/CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
|
||||
serial:91:F5:D7:C3:C0:A6:4E:61
|
||||
|
||||
X509v3 Extended Key Usage:
|
||||
TLS Web Client Authentication
|
||||
X509v3 Key Usage:
|
||||
Digital Signature
|
||||
X509v3 Subject Alternative Name:
|
||||
DNS:client
|
||||
Signature Algorithm: sha256WithRSAEncryption
|
||||
42:e8:50:b2:e7:88:75:86:0b:bb:29:e3:aa:c6:0e:4c:e8:ea:
|
||||
3d:0c:02:31:7f:3b:80:0c:3f:80:af:45:d6:62:27:a0:0e:e7:
|
||||
26:09:12:97:95:f8:d9:9b:89:b5:ef:56:64:f1:de:82:74:e0:
|
||||
31:0a:cc:90:0a:bd:50:b8:54:95:0a:ae:3b:40:df:76:b6:d1:
|
||||
01:2e:f3:96:9f:52:d4:e9:14:6d:b7:14:9d:45:99:33:36:2a:
|
||||
01:0b:15:1a:ed:55:dc:64:83:65:1a:06:42:d9:c7:dc:97:d4:
|
||||
02:81:c2:58:2b:ea:e4:b7:ae:84:3a:e4:3f:f1:2e:fa:ec:f3:
|
||||
40:5d:b8:6a:d5:5e:e1:e8:2f:e2:2f:48:a4:38:a1:4f:22:e3:
|
||||
4f:66:94:aa:02:78:9a:2b:7a:5d:aa:aa:51:a5:e3:d0:91:e9:
|
||||
1d:f9:08:ed:8b:51:c9:a6:af:46:85:b5:1c:ed:12:a1:28:33:
|
||||
75:36:00:d8:5c:14:65:96:c0:28:7d:47:50:a4:89:5f:b0:72:
|
||||
1a:4b:13:17:26:0f:f0:b8:65:3c:e9:96:36:f9:bf:90:59:33:
|
||||
87:1f:01:03:25:f8:f0:3a:9b:33:02:d0:0a:43:b5:0a:cf:62:
|
||||
a1:45:38:37:07:9d:9c:94:0b:31:c6:3c:34:b7:fc:5a:0c:e4:
|
||||
bf:23:f6:7d
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFqjCCBJKgAwIBAgIBAjANBgkqhkiG9w0BAQsFADCByzELMAkGA1UEBhMCRVMx
|
||||
EjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0wKwYDVQQK
|
||||
EyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAiBgNVBAsT
|
||||
G0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENBQyBDQTEN
|
||||
MAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MB4X
|
||||
DTE2MDExMjEyNDU0MVoXDTQ2MDExMjEyNDU0MVowgcoxCzAJBgNVBAYTAkVTMRIw
|
||||
EAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNlbG9uYTEtMCsGA1UEChMk
|
||||
VW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1bnlhMSQwIgYDVQQLExtB
|
||||
cnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxDzANBgNVBAMTBmNsaWVudDENMAsG
|
||||
A1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MIIBIjAN
|
||||
BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl5n6eg5N4h2lsagUGGTHZr/emR2S
|
||||
O4aCTZU596ZWSZcUT+M3AGz00B1WeecZtd02FY4dV3tZKdIRv1hI4PdBPRZkjaIL
|
||||
Sqz6xoPcECos2ZdI7hEqvEtg3bkuj0XKhws4ZRz4oh35UKpuYPlI31cSI+HnDIFc
|
||||
n8Wy5pmZlTBtVzYGjP37+U9g0jy6rihWL9pYXOjFe+x22Shu+4wH+dcjw3J2PPrc
|
||||
IGePzBbgkQfVaPkgTX1cLQIEFnZS81O+o9wN1ftrVSnzUjXIfZnRSpS+sY79hRgl
|
||||
60HpVtqvYoQgCgAXlJKUkWr4VDcX7h67+5Nxkdnk6bg7GH1tfUzOWFX5QQIDAQAB
|
||||
o4IBljCCAZIwCQYDVR0TBAIwADAtBglghkgBhvhCAQ0EIBYeRWFzeS1SU0EgR2Vu
|
||||
ZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQWBBQbiAbVMx1cSEa13niJNpaROnRD
|
||||
GDCCAQAGA1UdIwSB+DCB9YAU3J7NfVwfU3hBHXh5sGmWOqa0TuqhgdGkgc4wgcsx
|
||||
CzAJBgNVBAYTAkVTMRIwEAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNl
|
||||
bG9uYTEtMCsGA1UEChMkVW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1
|
||||
bnlhMSQwIgYDVQQLExtBcnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxEDAOBgNV
|
||||
BAMTB0xDQUMgQ0ExDTALBgNVBCkTBExDQUMxHjAcBgkqhkiG9w0BCQEWD2xjYWNA
|
||||
YWMudXBjLmVkdYIJAJH118PApk5hMBMGA1UdJQQMMAoGCCsGAQUFBwMCMAsGA1Ud
|
||||
DwQEAwIHgDARBgNVHREECjAIggZjbGllbnQwDQYJKoZIhvcNAQELBQADggEBAELo
|
||||
ULLniHWGC7sp46rGDkzo6j0MAjF/O4AMP4CvRdZiJ6AO5yYJEpeV+NmbibXvVmTx
|
||||
3oJ04DEKzJAKvVC4VJUKrjtA33a20QEu85afUtTpFG23FJ1FmTM2KgELFRrtVdxk
|
||||
g2UaBkLZx9yX1AKBwlgr6uS3roQ65D/xLvrs80BduGrVXuHoL+IvSKQ4oU8i409m
|
||||
lKoCeJorel2qqlGl49CR6R35CO2LUcmmr0aFtRztEqEoM3U2ANhcFGWWwCh9R1Ck
|
||||
iV+wchpLExcmD/C4ZTzpljb5v5BZM4cfAQMl+PA6mzMC0ApDtQrPYqFFODcHnZyU
|
||||
CzHGPDS3/FoM5L8j9n0=
|
||||
-----END CERTIFICATE-----
|
||||
@@ -2,13 +2,12 @@
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../common/xeon.nix
|
||||
../module/ceph.nix
|
||||
../module/emulation.nix
|
||||
../module/slurm-client.nix
|
||||
../module/slurm-firewall.nix
|
||||
../module/debuginfod.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
|
||||
@@ -2,13 +2,12 @@
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../common/xeon.nix
|
||||
../module/ceph.nix
|
||||
../module/emulation.nix
|
||||
../module/slurm-client.nix
|
||||
../module/slurm-firewall.nix
|
||||
../module/debuginfod.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
|
||||
@@ -3,13 +3,6 @@
|
||||
{
|
||||
imports = [
|
||||
../common/base.nix
|
||||
../common/ssf/hosts.nix
|
||||
../module/emulation.nix
|
||||
../module/debuginfod.nix
|
||||
../module/nvidia.nix
|
||||
../eudy/kernel/perf.nix
|
||||
./wireguard.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
# Don't install Grub on the disk yet
|
||||
@@ -30,41 +23,14 @@
|
||||
address = "84.88.51.152";
|
||||
prefixLength = 25;
|
||||
} ];
|
||||
interfaces.enp5s0f1.ipv4.addresses = [ {
|
||||
address = "10.0.44.1";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
nat = {
|
||||
enable = true;
|
||||
internalInterfaces = [ "enp5s0f1" ];
|
||||
externalInterface = "eno0";
|
||||
};
|
||||
hosts = {
|
||||
"10.0.44.4" = [ "tent" ];
|
||||
"84.88.53.236" = [ "apex" ];
|
||||
};
|
||||
};
|
||||
|
||||
# Mount the NFS home
|
||||
fileSystems."/nfs/home" = {
|
||||
device = "10.106.0.30:/home";
|
||||
fsType = "nfs";
|
||||
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
||||
};
|
||||
|
||||
# Enable performance governor
|
||||
powerManagement.cpuFreqGovernor = "performance";
|
||||
|
||||
hardware.nvidia.open = false; # Maxwell is older than Turing architecture
|
||||
|
||||
services.openssh.settings.X11Forwarding = true;
|
||||
|
||||
services.prometheus.exporters.node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
# Configure Nvidia driver to use with CUDA
|
||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
||||
hardware.graphics.enable = true;
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
nixpkgs.config.nvidia.acceptLicense = true;
|
||||
services.xserver.videoDrivers = [ "nvidia" ];
|
||||
|
||||
users.motd = ''
|
||||
⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
networking.nat = {
|
||||
enable = true;
|
||||
enableIPv6 = false;
|
||||
externalInterface = "eno0";
|
||||
internalInterfaces = [ "wg0" ];
|
||||
};
|
||||
|
||||
networking.firewall = {
|
||||
allowedUDPPorts = [ 666 ];
|
||||
};
|
||||
|
||||
age.secrets.wgRaccoon.file = ../../secrets/wg-raccoon.age;
|
||||
|
||||
# Enable WireGuard
|
||||
networking.wireguard.enable = true;
|
||||
networking.wireguard.interfaces = {
|
||||
wg0 = {
|
||||
ips = [ "10.106.0.236/24" ];
|
||||
listenPort = 666;
|
||||
privateKeyFile = config.age.secrets.wgRaccoon.path;
|
||||
# Public key: QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=
|
||||
peers = [
|
||||
{
|
||||
name = "fox";
|
||||
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
|
||||
allowedIPs = [ "10.106.0.1/32" ];
|
||||
endpoint = "fox.ac.upc.edu:666";
|
||||
persistentKeepalive = 25;
|
||||
}
|
||||
{
|
||||
name = "apex";
|
||||
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
|
||||
allowedIPs = [ "10.106.0.30/32" "10.0.40.0/24" ];
|
||||
endpoint = "ssfhead.bsc.es:666";
|
||||
persistentKeepalive = 25;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
networking.hosts = {
|
||||
"10.106.0.1" = [ "fox.wg" ];
|
||||
"10.106.0.30" = [ "apex.wg" ];
|
||||
};
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
modules:
|
||||
http_2xx:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
preferred_ip_protocol: "ip4"
|
||||
follow_redirects: true
|
||||
valid_status_codes: [] # Defaults to 2xx
|
||||
method: GET
|
||||
icmp:
|
||||
prober: icmp
|
||||
timeout: 5s
|
||||
icmp:
|
||||
preferred_ip_protocol: "ip4"
|
||||
@@ -1,85 +0,0 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/xeon.nix
|
||||
../common/ssf/hosts.nix
|
||||
../module/emulation.nix
|
||||
../module/debuginfod.nix
|
||||
./monitoring.nix
|
||||
./nginx.nix
|
||||
./nix-serve.nix
|
||||
./gitlab-runner.nix
|
||||
./gitea.nix
|
||||
../hut/public-inbox.nix
|
||||
../hut/msmtp.nix
|
||||
../module/p.nix
|
||||
../module/vpn-dac.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675";
|
||||
|
||||
networking = {
|
||||
hostName = "tent";
|
||||
interfaces.eno1.ipv4.addresses = [
|
||||
{
|
||||
address = "10.0.44.4";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
|
||||
# Only BSC DNSs seem to be reachable from the office VLAN
|
||||
nameservers = [ "84.88.52.35" "84.88.52.36" ];
|
||||
search = [ "bsc.es" "ac.upc.edu" ];
|
||||
defaultGateway = "10.0.44.1";
|
||||
hosts = {
|
||||
"84.88.53.236" = [ "apex" ];
|
||||
"10.0.44.1" = [ "raccoon" ];
|
||||
};
|
||||
};
|
||||
|
||||
services.p.enable = true;
|
||||
|
||||
services.prometheus.exporters.node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
|
||||
boot.swraid = {
|
||||
enable = true;
|
||||
mdadmConf = ''
|
||||
DEVICE partitions
|
||||
ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d
|
||||
MAILADDR root
|
||||
'';
|
||||
};
|
||||
|
||||
fileSystems."/vault" = {
|
||||
device = "/dev/disk/by-label/vault";
|
||||
fsType = "ext4";
|
||||
};
|
||||
|
||||
# Make a /vault/$USER directory for each user.
|
||||
systemd.services.create-vault-dirs = let
|
||||
# Take only normal users in tent
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
commands = lib.concatLists (lib.mapAttrsToList
|
||||
(_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0711 /vault/home/${user.name}"
|
||||
]) users);
|
||||
script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands);
|
||||
in {
|
||||
enable = true;
|
||||
wants = [ "local-fs.target" ];
|
||||
after = [ "local-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
|
||||
# disable automatic garbage collector
|
||||
nix.gc.automatic = lib.mkForce false;
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{ config, lib, ... }:
|
||||
{
|
||||
services.gitea = {
|
||||
enable = true;
|
||||
appName = "Gitea in the jungle";
|
||||
|
||||
settings = {
|
||||
server = {
|
||||
ROOT_URL = "https://jungle.bsc.es/git/";
|
||||
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
|
||||
LANDING_PAGE = "explore";
|
||||
};
|
||||
metrics.ENABLED = true;
|
||||
service = {
|
||||
DISABLE_REGISTRATION = true;
|
||||
REGISTER_MANUAL_CONFIRM = true;
|
||||
ENABLE_NOTIFY_MAIL = true;
|
||||
};
|
||||
log.LEVEL = "Warn";
|
||||
|
||||
mailer = {
|
||||
ENABLED = true;
|
||||
FROM = "jungle-robot@bsc.es";
|
||||
PROTOCOL = "sendmail";
|
||||
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
|
||||
SENDMAIL_ARGS = "--";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
|
||||
{
|
||||
age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age;
|
||||
age.secrets.tent-gitlab-runner-pm-docker.file = ../../secrets/tent-gitlab-runner-pm-docker-token.age;
|
||||
age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age;
|
||||
|
||||
services.gitlab-runner = let sec = config.age.secrets; in {
|
||||
enable = true;
|
||||
settings.concurrent = 5;
|
||||
services = {
|
||||
# For gitlab.pm.bsc.es
|
||||
gitlab-pm-shell = {
|
||||
executor = "shell";
|
||||
environmentVariables = {
|
||||
SHELL = "${pkgs.bash}/bin/bash";
|
||||
};
|
||||
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-shell.path;
|
||||
preGetSourcesScript = pkgs.writeScript "setup" ''
|
||||
echo "This is the preGetSources script running, brace for impact"
|
||||
env
|
||||
'';
|
||||
};
|
||||
gitlab-pm-docker = {
|
||||
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-docker.path;
|
||||
executor = "docker";
|
||||
dockerImage = "debian:stable";
|
||||
};
|
||||
|
||||
# For gitlab.bsc.es
|
||||
gitlab-bsc-docker = {
|
||||
# gitlab.bsc.es still uses the old token mechanism
|
||||
registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path;
|
||||
tagList = [ "docker" "tent" "nix" ];
|
||||
executor = "docker";
|
||||
dockerImage = "alpine";
|
||||
dockerVolumes = [
|
||||
"/nix/store:/nix/store:ro"
|
||||
"/nix/var/nix/db:/nix/var/nix/db:ro"
|
||||
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
|
||||
];
|
||||
dockerDisableCache = true;
|
||||
registrationFlags = [
|
||||
# Increase build log length to 64 MiB
|
||||
"--output-limit 65536"
|
||||
];
|
||||
preBuildScript = pkgs.writeScript "setup-container" ''
|
||||
mkdir -p -m 0755 /nix/var/log/nix/drvs
|
||||
mkdir -p -m 0755 /nix/var/nix/gcroots
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles
|
||||
mkdir -p -m 0755 /nix/var/nix/temproots
|
||||
mkdir -p -m 0755 /nix/var/nix/userpool
|
||||
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
|
||||
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
|
||||
mkdir -p -m 0700 "$HOME/.nix-defexpr"
|
||||
mkdir -p -m 0700 "$HOME/.ssh"
|
||||
cat >> "$HOME/.ssh/known_hosts" << EOF
|
||||
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
|
||||
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
|
||||
EOF
|
||||
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
|
||||
# Required to load SSL certificate paths
|
||||
. ${pkgs.cacert}/nix-support/setup-hook
|
||||
'';
|
||||
environmentVariables = {
|
||||
ENV = "/etc/profile";
|
||||
USER = "root";
|
||||
NIX_REMOTE = "daemon";
|
||||
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.gitlab-runner.serviceConfig = {
|
||||
DynamicUser = lib.mkForce false;
|
||||
User = "gitlab-runner";
|
||||
Group = "gitlab-runner";
|
||||
ExecStart = lib.mkForce
|
||||
''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}'';
|
||||
};
|
||||
|
||||
users.users.gitlab-runner = {
|
||||
uid = config.ids.uids.gitlab-runner;
|
||||
home = "/var/lib/gitlab-runner";
|
||||
description = "Gitlab Runner";
|
||||
group = "gitlab-runner";
|
||||
extraGroups = [ "docker" ];
|
||||
createHome = true;
|
||||
};
|
||||
users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner;
|
||||
}
|
||||
@@ -1,217 +0,0 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../module/meteocat-exporter.nix
|
||||
../module/upc-qaire-exporter.nix
|
||||
../module/nix-daemon-exporter.nix
|
||||
];
|
||||
|
||||
age.secrets.grafanaJungleRobotPassword = {
|
||||
file = ../../secrets/jungle-robot-password.age;
|
||||
owner = "grafana";
|
||||
mode = "400";
|
||||
};
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
server = {
|
||||
domain = "jungle.bsc.es";
|
||||
root_url = "%(protocol)s://%(domain)s/grafana";
|
||||
serve_from_sub_path = true;
|
||||
http_port = 2342;
|
||||
http_addr = "127.0.0.1";
|
||||
};
|
||||
smtp = {
|
||||
enabled = true;
|
||||
from_address = "jungle-robot@bsc.es";
|
||||
user = "jungle-robot";
|
||||
# Read the password from a file, which is only readable by grafana user
|
||||
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
|
||||
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
|
||||
host = "mail.bsc.es:465";
|
||||
startTLS_policy = "NoStartTLS";
|
||||
};
|
||||
feature_toggles.publicDashboards = true;
|
||||
"auth.anonymous".enabled = true;
|
||||
log.level = "warn";
|
||||
};
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = 9001;
|
||||
retentionTime = "5y";
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
|
||||
# We need access to the devices to monitor the disk space
|
||||
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
|
||||
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
|
||||
|
||||
# Credentials for IPMI exporter
|
||||
age.secrets.ipmiYml = {
|
||||
file = ../../secrets/ipmi.yml.age;
|
||||
owner = "ipmi-exporter";
|
||||
};
|
||||
|
||||
# Create an IPMI group and assign the ipmi0 device
|
||||
users.groups.ipmi = {};
|
||||
services.udev.extraRules = ''
|
||||
SUBSYSTEM=="ipmi", KERNEL=="ipmi0", GROUP="ipmi", MODE="0660"
|
||||
'';
|
||||
|
||||
# Add a new ipmi-exporter user that can read the ipmi0 device
|
||||
users.users.ipmi-exporter = {
|
||||
isSystemUser = true;
|
||||
group = "ipmi";
|
||||
};
|
||||
|
||||
# Disable dynamic user so we have the ipmi-exporter user available for the credentials
|
||||
systemd.services.prometheus-ipmi-exporter.serviceConfig = {
|
||||
DynamicUser = lib.mkForce false;
|
||||
PrivateDevices = lib.mkForce false;
|
||||
User = lib.mkForce "ipmi-exporter";
|
||||
Group = lib.mkForce "ipmi";
|
||||
RestrictNamespaces = lib.mkForce false;
|
||||
# Fake uid to 0 so it shuts up
|
||||
ExecStart = let
|
||||
cfg = config.services.prometheus.exporters.ipmi;
|
||||
in lib.mkForce (lib.concatStringsSep " " ([
|
||||
"${pkgs.util-linux}/bin/unshare --map-user 0"
|
||||
"${pkgs.prometheus-ipmi-exporter}/bin/ipmi_exporter"
|
||||
"--web.listen-address ${cfg.listenAddress}:${toString cfg.port}"
|
||||
"--config.file ${lib.escapeShellArg cfg.configFile}"
|
||||
] ++ cfg.extraFlags));
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
exporters = {
|
||||
ipmi = {
|
||||
enable = true;
|
||||
configFile = config.age.secrets.ipmiYml.path;
|
||||
#extraFlags = [ "--log.level=debug" ];
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "logind" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
blackbox = {
|
||||
enable = true;
|
||||
listenAddress = "127.0.0.1";
|
||||
configFile = ./blackbox.yml;
|
||||
};
|
||||
};
|
||||
|
||||
scrapeConfigs = [
|
||||
{
|
||||
job_name = "local";
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"127.0.0.1:9002" # Node exporter
|
||||
#"127.0.0.1:9115" # Blackbox exporter
|
||||
"127.0.0.1:9290" # IPMI exporter for local node
|
||||
"127.0.0.1:9928" # UPC Qaire custom exporter
|
||||
"127.0.0.1:9929" # Meteocat custom exporter
|
||||
"127.0.0.1:9999" # Nix-daemon custom exporter
|
||||
];
|
||||
}];
|
||||
}
|
||||
{
|
||||
job_name = "blackbox-http";
|
||||
metrics_path = "/probe";
|
||||
params = { module = [ "http_2xx" ]; };
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"https://www.google.com/robots.txt"
|
||||
"https://pm.bsc.es/"
|
||||
"https://pm.bsc.es/gitlab/"
|
||||
"https://jungle.bsc.es/"
|
||||
"https://gitlab.bsc.es/"
|
||||
];
|
||||
}];
|
||||
relabel_configs = [
|
||||
{
|
||||
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
# Sets the "instance" label with the remote host we are querying
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
# Shows the host target address instead of the blackbox address
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:9115";
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "blackbox-icmp";
|
||||
metrics_path = "/probe";
|
||||
params = { module = [ "icmp" ]; };
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"1.1.1.1"
|
||||
"8.8.8.8"
|
||||
"ssfhead"
|
||||
"raccoon"
|
||||
"anella-bsc.cesca.cat"
|
||||
"upc-anella.cesca.cat"
|
||||
"fox.ac.upc.edu"
|
||||
"fox-ipmi.ac.upc.edu"
|
||||
"arenys5.ac.upc.edu"
|
||||
"arenys0-2.ac.upc.edu"
|
||||
"epi01.bsc.es"
|
||||
"axle.bsc.es"
|
||||
];
|
||||
}];
|
||||
relabel_configs = [
|
||||
{
|
||||
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
# Sets the "instance" label with the remote host we are querying
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
# Shows the host target address instead of the blackbox address
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:9115";
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "ipmi-raccoon";
|
||||
metrics_path = "/ipmi";
|
||||
static_configs = [
|
||||
{ targets = [ "127.0.0.1:9290" ]; }
|
||||
];
|
||||
params = {
|
||||
target = [ "raccoon-ipmi" ];
|
||||
module = [ "raccoon" ];
|
||||
};
|
||||
}
|
||||
{
|
||||
job_name = "ipmi-fox";
|
||||
metrics_path = "/ipmi";
|
||||
static_configs = [
|
||||
{ targets = [ "127.0.0.1:9290" ]; }
|
||||
];
|
||||
params = {
|
||||
target = [ "fox-ipmi.ac.upc.edu" ];
|
||||
module = [ "fox" ];
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
{ theFlake, pkgs, ... }:
|
||||
let
|
||||
website = pkgs.stdenv.mkDerivation {
|
||||
name = "jungle-web";
|
||||
src = pkgs.fetchgit {
|
||||
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
|
||||
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
|
||||
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
|
||||
};
|
||||
buildInputs = [ pkgs.hugo ];
|
||||
buildPhase = ''
|
||||
rm -rf public/
|
||||
hugo
|
||||
'';
|
||||
installPhase = ''
|
||||
cp -r public $out
|
||||
'';
|
||||
# Don't mess doc/
|
||||
dontFixup = true;
|
||||
};
|
||||
in
|
||||
{
|
||||
networking.firewall.allowedTCPPorts = [ 80 ];
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
virtualHosts."jungle.bsc.es" = {
|
||||
root = "${website}";
|
||||
listen = [
|
||||
{
|
||||
addr = "0.0.0.0";
|
||||
port = 80;
|
||||
}
|
||||
];
|
||||
extraConfig = ''
|
||||
set_real_ip_from 127.0.0.1;
|
||||
set_real_ip_from 84.88.52.107;
|
||||
real_ip_recursive on;
|
||||
real_ip_header X-Forwarded-For;
|
||||
|
||||
location /git {
|
||||
rewrite ^/git$ / break;
|
||||
rewrite ^/git/(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:3000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /cache {
|
||||
rewrite ^/cache/(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:5000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /lists {
|
||||
proxy_pass http://127.0.0.1:8081;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /grafana {
|
||||
proxy_pass http://127.0.0.1:2342;
|
||||
proxy_redirect http:// $scheme://;
|
||||
proxy_set_header Host $host;
|
||||
# Websockets
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
location ~ ^/~(.+?)(/.*)?$ {
|
||||
alias /vault/home/$1/public_html$2;
|
||||
index index.html index.htm;
|
||||
autoindex on;
|
||||
absolute_redirect off;
|
||||
}
|
||||
location /p/ {
|
||||
alias /var/lib/p/;
|
||||
}
|
||||
location /pub/ {
|
||||
alias /vault/pub/;
|
||||
}
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{ config, ... }:
|
||||
|
||||
{
|
||||
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
|
||||
|
||||
services.nix-serve = {
|
||||
enable = true;
|
||||
# Only listen locally, as we serve it via ssh
|
||||
bindAddress = "127.0.0.1";
|
||||
port = 5000;
|
||||
|
||||
secretKeyFile = config.age.secrets.nixServe.path;
|
||||
# Public key:
|
||||
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
|
||||
};
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/ssf.nix
|
||||
../module/hut-substituter.nix
|
||||
];
|
||||
|
||||
# Select this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca";
|
||||
|
||||
# No swap, there is plenty of RAM
|
||||
swapDevices = lib.mkForce [];
|
||||
|
||||
# Users with sudo access
|
||||
users.groups.wheel.members = [ "abonerib" "anavarro" ];
|
||||
|
||||
# Run julia installed with juliaup using julia's own libraries:
|
||||
# NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia
|
||||
programs.nix-ld.enable = true;
|
||||
|
||||
networking = {
|
||||
hostName = "weasel";
|
||||
interfaces.eno1.ipv4.addresses = [ {
|
||||
address = "10.0.40.6";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||
address = "10.0.42.6";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
}
|
||||
149
overlay.nix
149
overlay.nix
@@ -1,149 +0,0 @@
|
||||
final: /* Future last stage */
|
||||
prev: /* Previous stage */
|
||||
|
||||
with final.lib;
|
||||
|
||||
let
|
||||
callPackage = final.callPackage;
|
||||
|
||||
bscPkgs = {
|
||||
amd-uprof = prev.callPackage ./pkgs/amd-uprof/default.nix { };
|
||||
bench6 = callPackage ./pkgs/bench6/default.nix { };
|
||||
bigotes = callPackage ./pkgs/bigotes/default.nix { };
|
||||
clangOmpss2 = callPackage ./pkgs/llvm-ompss2/default.nix { };
|
||||
clangOmpss2Nanos6 = callPackage ./pkgs/llvm-ompss2/default.nix { ompss2rt = final.nanos6; };
|
||||
clangOmpss2Nodes = callPackage ./pkgs/llvm-ompss2/default.nix { ompss2rt = final.nodes; openmp = final.openmp; };
|
||||
clangOmpss2NodesOmpv = callPackage ./pkgs/llvm-ompss2/default.nix { ompss2rt = final.nodes; openmp = final.openmpv; };
|
||||
clangOmpss2Unwrapped = callPackage ./pkgs/llvm-ompss2/clang.nix { };
|
||||
cudainfo = prev.callPackage ./pkgs/cudainfo/default.nix { };
|
||||
#extrae = callPackage ./pkgs/extrae/default.nix { }; # Broken and outdated
|
||||
gpi-2 = callPackage ./pkgs/gpi-2/default.nix { };
|
||||
intelPackages_2023 = callPackage ./pkgs/intel-oneapi/2023.nix { };
|
||||
jemallocNanos6 = callPackage ./pkgs/nanos6/jemalloc.nix { };
|
||||
# FIXME: Extend this to all linuxPackages variants. Open problem, see:
|
||||
# https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636
|
||||
linuxPackages = prev.linuxPackages.extend (_final: _prev: {
|
||||
amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { };
|
||||
});
|
||||
linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: {
|
||||
amd-uprof-driver = _prev.callPackage ./pkgs/amd-uprof/driver.nix { };
|
||||
});
|
||||
lmbench = callPackage ./pkgs/lmbench/default.nix { };
|
||||
mcxx = callPackage ./pkgs/mcxx/default.nix { };
|
||||
meteocat-exporter = prev.callPackage ./pkgs/meteocat-exporter/default.nix { };
|
||||
mpi = final.mpich; # Set MPICH as default
|
||||
mpich = callPackage ./pkgs/mpich/default.nix { mpich = prev.mpich; };
|
||||
nanos6 = callPackage ./pkgs/nanos6/default.nix { };
|
||||
nanos6Debug = final.nanos6.override { enableDebug = true; };
|
||||
nixtools = callPackage ./pkgs/nixtools/default.nix { };
|
||||
# Broken because of pkgsStatic.libcap
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/268791
|
||||
#nix-wrap = callPackage ./pkgs/nix-wrap/default.nix { };
|
||||
nodes = callPackage ./pkgs/nodes/default.nix { };
|
||||
nosv = callPackage ./pkgs/nosv/default.nix { };
|
||||
openmp = callPackage ./pkgs/llvm-ompss2/openmp.nix { monorepoSrc = final.clangOmpss2Unwrapped.src; version = final.clangOmpss2Unwrapped.version; };
|
||||
openmpv = final.openmp.override { enableNosv = true; enableOvni = true; };
|
||||
osumb = callPackage ./pkgs/osu/default.nix { };
|
||||
ovni = callPackage ./pkgs/ovni/default.nix { };
|
||||
ovniGit = final.ovni.override { useGit = true; };
|
||||
paraverKernel = callPackage ./pkgs/paraver/kernel.nix { };
|
||||
prometheus-slurm-exporter = prev.callPackage ./pkgs/slurm-exporter/default.nix { };
|
||||
#pscom = callPackage ./pkgs/parastation/pscom.nix { }; # Unmaintaned
|
||||
#psmpi = callPackage ./pkgs/parastation/psmpi.nix { }; # Unmaintaned
|
||||
sonar = callPackage ./pkgs/sonar/default.nix { };
|
||||
stdenvClangOmpss2 = final.stdenv.override { cc = final.clangOmpss2; allowedRequisites = null; };
|
||||
stdenvClangOmpss2Nanos6 = final.stdenv.override { cc = final.clangOmpss2Nanos6; allowedRequisites = null; };
|
||||
stdenvClangOmpss2Nodes = final.stdenv.override { cc = final.clangOmpss2Nodes; allowedRequisites = null; };
|
||||
stdenvClangOmpss2NodesOmpv = final.stdenv.override { cc = final.clangOmpss2NodesOmpv; allowedRequisites = null; };
|
||||
tagaspi = callPackage ./pkgs/tagaspi/default.nix { };
|
||||
tampi = callPackage ./pkgs/tampi/default.nix { };
|
||||
upc-qaire-exporter = prev.callPackage ./pkgs/upc-qaire-exporter/default.nix { };
|
||||
wxparaver = callPackage ./pkgs/paraver/default.nix { };
|
||||
};
|
||||
|
||||
tests = rec {
|
||||
hwloc = callPackage ./test/bugs/hwloc.nix { };
|
||||
#sigsegv = callPackage ./test/reproducers/sigsegv.nix { };
|
||||
hello-c = callPackage ./test/compilers/hello-c.nix { };
|
||||
hello-cpp = callPackage ./test/compilers/hello-cpp.nix { };
|
||||
lto = callPackage ./test/compilers/lto.nix { };
|
||||
asan = callPackage ./test/compilers/asan.nix { };
|
||||
intel2023-icx-c = hello-c.override { stdenv = final.intelPackages_2023.stdenv; };
|
||||
intel2023-icc-c = hello-c.override { stdenv = final.intelPackages_2023.stdenv-icc; };
|
||||
intel2023-icx-cpp = hello-cpp.override { stdenv = final.intelPackages_2023.stdenv; };
|
||||
intel2023-icc-cpp = hello-cpp.override { stdenv = final.intelPackages_2023.stdenv-icc; };
|
||||
intel2023-ifort = callPackage ./test/compilers/hello-f.nix {
|
||||
stdenv = final.intelPackages_2023.stdenv-ifort;
|
||||
};
|
||||
clangOmpss2-lto = lto.override { stdenv = final.stdenvClangOmpss2Nanos6; };
|
||||
clangOmpss2-asan = asan.override { stdenv = final.stdenvClangOmpss2Nanos6; };
|
||||
clangOmpss2-task = callPackage ./test/compilers/ompss2.nix {
|
||||
stdenv = final.stdenvClangOmpss2Nanos6;
|
||||
};
|
||||
clangNodes-task = callPackage ./test/compilers/ompss2.nix {
|
||||
stdenv = final.stdenvClangOmpss2Nodes;
|
||||
};
|
||||
clangNosvOpenmp-task = callPackage ./test/compilers/clang-openmp.nix {
|
||||
stdenv = final.stdenvClangOmpss2Nodes;
|
||||
};
|
||||
clangNosvOmpv-nosv = callPackage ./test/compilers/clang-openmp-nosv.nix {
|
||||
stdenv = final.stdenvClangOmpss2NodesOmpv;
|
||||
};
|
||||
clangNosvOmpv-ld = callPackage ./test/compilers/clang-openmp-ld.nix {
|
||||
stdenv = final.stdenvClangOmpss2NodesOmpv;
|
||||
};
|
||||
};
|
||||
|
||||
# For now, only build toplevel packages in CI/Hydra
|
||||
pkgsTopLevel = filterAttrs (_: isDerivation) bscPkgs;
|
||||
|
||||
# Native build in that platform doesn't imply cross build works
|
||||
canCrossCompile = platform: pkg:
|
||||
(isDerivation pkg) &&
|
||||
# Must be defined explicitly
|
||||
(pkg.meta.cross or false) &&
|
||||
(meta.availableOn platform pkg);
|
||||
|
||||
# For now only RISC-V
|
||||
crossSet = { riscv64 = final.pkgsCross.riscv64.bsc.pkgsTopLevel; };
|
||||
|
||||
buildList = name: paths:
|
||||
final.runCommandLocal name { } ''
|
||||
printf '%s\n' ${toString paths} | tee $out
|
||||
'';
|
||||
|
||||
buildList' = name: paths:
|
||||
final.runCommandLocal name { } ''
|
||||
deps="${toString paths}"
|
||||
cat $deps
|
||||
printf '%s\n' $deps >$out
|
||||
'';
|
||||
|
||||
pkgsList = buildList "ci-pkgs" (builtins.attrValues pkgsTopLevel);
|
||||
testsList = buildList "ci-tests" (collect isDerivation tests);
|
||||
allList = buildList' "ci-all" [ pkgsList testsList ];
|
||||
# For now only RISC-V
|
||||
crossList = buildList "ci-cross"
|
||||
(filter
|
||||
(canCrossCompile final.pkgsCross.riscv64.stdenv.hostPlatform)
|
||||
(builtins.attrValues crossSet.riscv64));
|
||||
|
||||
in bscPkgs // {
|
||||
# Prevent accidental usage of bsc-ci attribute
|
||||
bsc-ci = throw "the bsc-ci attribute is deprecated, use bsc.ci";
|
||||
|
||||
# Internal for our CI tests
|
||||
bsc = {
|
||||
# CI targets for nix build
|
||||
ci = { pkgs = pkgsList; tests = testsList; all = allList; cross = crossList; };
|
||||
|
||||
# Direct access to package sets
|
||||
tests = tests;
|
||||
pkgs = bscPkgs;
|
||||
pkgsTopLevel = pkgsTopLevel;
|
||||
cross = crossSet;
|
||||
|
||||
# Hydra uses attribute sets of pkgs
|
||||
hydraJobs = { tests = tests; pkgs = pkgsTopLevel; cross = crossSet; };
|
||||
};
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
{ stdenv
|
||||
, lib
|
||||
, curl
|
||||
, cacert
|
||||
, runCommandLocal
|
||||
, autoPatchelfHook
|
||||
, elfutils
|
||||
, glib
|
||||
, libGL
|
||||
, ncurses5
|
||||
, xorg
|
||||
, zlib
|
||||
, libxkbcommon
|
||||
, freetype
|
||||
, fontconfig
|
||||
, libGLU
|
||||
, dbus
|
||||
, rocmPackages
|
||||
, libxcrypt-legacy
|
||||
, numactl
|
||||
, radare2
|
||||
}:
|
||||
|
||||
let
|
||||
version = "5.1.701";
|
||||
tarball = "AMDuProf_Linux_x64_${version}.tar.bz2";
|
||||
|
||||
# NOTE: Remember to update the radare2 patch below if AMDuProfPcm changes.
|
||||
uprofSrc = runCommandLocal tarball {
|
||||
nativeBuildInputs = [ curl ];
|
||||
outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk=";
|
||||
SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt";
|
||||
} ''
|
||||
curl \
|
||||
-o $out \
|
||||
'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \
|
||||
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \
|
||||
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \
|
||||
-H 'Accept-Language: en-US,en;q=0.5' \
|
||||
-H 'Accept-Encoding: gzip, deflate, br, zstd' \
|
||||
-H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n'
|
||||
'';
|
||||
|
||||
in
|
||||
stdenv.mkDerivation {
|
||||
pname = "AMD-uProf";
|
||||
inherit version;
|
||||
src = uprofSrc;
|
||||
dontStrip = true;
|
||||
phases = [ "installPhase" "fixupPhase" ];
|
||||
nativeBuildInputs = [ autoPatchelfHook radare2 ];
|
||||
buildInputs = [
|
||||
stdenv.cc.cc.lib
|
||||
ncurses5
|
||||
elfutils
|
||||
glib
|
||||
libGL
|
||||
libGLU
|
||||
libxcrypt-legacy
|
||||
xorg.libX11
|
||||
xorg.libXext
|
||||
xorg.libXi
|
||||
xorg.libXmu
|
||||
xorg.libxcb
|
||||
xorg.xcbutilwm
|
||||
xorg.xcbutilrenderutil
|
||||
xorg.xcbutilkeysyms
|
||||
xorg.xcbutilimage
|
||||
fontconfig.lib
|
||||
libxkbcommon
|
||||
zlib
|
||||
freetype
|
||||
dbus
|
||||
rocmPackages.rocprofiler
|
||||
numactl
|
||||
];
|
||||
installPhase = ''
|
||||
set -x
|
||||
mkdir -p $out
|
||||
tar -x -v -C $out --strip-components=1 -f $src
|
||||
rm $out/bin/AMDPowerProfilerDriverSource.tar.gz
|
||||
patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so
|
||||
patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys
|
||||
echo "16334a51fcc48668307ad94e20482ca4 $out/bin/AMDuProfPcm" | md5sum -c -
|
||||
radare2 -w -q -i ${./libnuma.r2} $out/bin/AMDuProfPcm
|
||||
patchelf --add-needed libnuma.so $out/bin/AMDuProfPcm
|
||||
set +x
|
||||
'';
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
{ stdenv
|
||||
, lib
|
||||
, amd-uprof
|
||||
, kernel
|
||||
, runCommandLocal
|
||||
}:
|
||||
|
||||
let
|
||||
version = amd-uprof.version;
|
||||
tarball = amd-uprof.src;
|
||||
in stdenv.mkDerivation {
|
||||
pname = "AMDPowerProfilerDriver";
|
||||
inherit version;
|
||||
src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } ''
|
||||
set -x
|
||||
tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz
|
||||
mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out
|
||||
set +x
|
||||
'';
|
||||
hardeningDisable = [ "pic" "format" ];
|
||||
nativeBuildInputs = kernel.moduleBuildDependencies;
|
||||
patches = [ ./makefile.patch ./hrtimer.patch ];
|
||||
makeFlags = [
|
||||
"KERNEL_VERSION=${kernel.modDirVersion}"
|
||||
"KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
|
||||
"INSTALL_MOD_PATH=$(out)"
|
||||
];
|
||||
meta = {
|
||||
description = "AMD Power Profiler Driver";
|
||||
homepage = "https://www.amd.com/es/developer/uprof.html";
|
||||
platforms = lib.platforms.linux;
|
||||
};
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
--- a/src/PmcTimerConfig.c 2025-09-04 12:17:16.771707049 +0200
|
||||
+++ b/src/PmcTimerConfig.c 2025-09-04 12:17:04.878515468 +0200
|
||||
@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo)
|
||||
|
||||
DRVPRINT("pTimerConfig(%p)", pTimerConfig);
|
||||
|
||||
- hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||
+ hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||
}
|
||||
|
||||
int PmcSetupTimer(ClientContext* pClientCtx)
|
||||
@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient
|
||||
{
|
||||
/* Interval in ms */
|
||||
pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000);
|
||||
- pTimerConfig->m_hrTimer.function = PmcTimerCallback;
|
||||
|
||||
DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time);
|
||||
}
|
||||
--- a/src/PwrProfTimer.c 2025-09-04 12:18:08.750544327 +0200
|
||||
+++ b/src/PwrProfTimer.c 2025-09-04 12:18:28.557863382 +0200
|
||||
@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu)
|
||||
pCoreClientData = &per_cpu(g_coreClientData, cpu);
|
||||
|
||||
// initialize HR timer
|
||||
- hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||
- pCoreClientData->m_hrTimer.function = &HrTimerCallback;
|
||||
+ hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||
|
||||
return;
|
||||
} // InitHrTimer
|
||||
@@ -1,10 +0,0 @@
|
||||
# Patch arguments to call sym std::string::find(char const*, unsigned long, unsigned long)
|
||||
# so it matches NixOS:
|
||||
#
|
||||
# Change OS name to NixOS
|
||||
wz NixOS @ 0x00550a43
|
||||
# And set the length to 5 characters
|
||||
wa mov ecx, 5 @0x00517930
|
||||
#
|
||||
# Then change the argument to dlopen() so it only uses libnuma.so
|
||||
wz libnuma.so @ 0x00562940
|
||||
@@ -1,66 +0,0 @@
|
||||
--- a/Makefile 2025-06-19 20:36:49.346693267 +0200
|
||||
+++ b/Makefile 2025-06-19 20:42:29.778088660 +0200
|
||||
@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi
|
||||
MODULE_NAME_KO=$(MODULE_NAME).ko
|
||||
|
||||
# check is module inserted
|
||||
-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
|
||||
+#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
|
||||
|
||||
# check pcore dkms status
|
||||
PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION))
|
||||
@@ -50,7 +50,7 @@ endif
|
||||
# “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00
|
||||
G_VERSION=9
|
||||
K_VERSION=5
|
||||
-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.)
|
||||
+KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.)
|
||||
GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.)
|
||||
ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION))))
|
||||
ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION))))
|
||||
@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs := src/PmcDataBuffe
|
||||
|
||||
# make
|
||||
all:
|
||||
- @chmod a+x ./AMDPPcert.sh
|
||||
- @./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status;
|
||||
- @SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||
- if [ $$SIGSTATUS1 -eq 1 ]; then \
|
||||
- exit 1; \
|
||||
- fi
|
||||
- @make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules
|
||||
- @SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||
- if [ $$SIGSTATUS3 -eq 0 ]; then \
|
||||
- ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \
|
||||
- fi
|
||||
+ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules
|
||||
|
||||
# make clean
|
||||
clean:
|
||||
@@ -84,23 +74,9 @@ clean:
|
||||
|
||||
# make install
|
||||
install:
|
||||
- @mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra
|
||||
- @rm -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO)
|
||||
- @cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/
|
||||
- @depmod -a
|
||||
- @if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \
|
||||
- echo "Uninstalling AMDPowerProfiler Linux kernel module.";\
|
||||
- rmmod $(MODULE_NAME);\
|
||||
- fi
|
||||
- @modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \
|
||||
- cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \
|
||||
- echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||
- if [ $$SIGSTATUS1 -eq 0 ]; then \
|
||||
- echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \
|
||||
- exit 1; \
|
||||
- else \
|
||||
- cat $(PWD)/sign_status1; \
|
||||
- fi
|
||||
+ mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
|
||||
+ cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
|
||||
+
|
||||
# make dkms
|
||||
dkms:
|
||||
@chmod a+x ./AMDPPcert.sh
|
||||
@@ -1,61 +0,0 @@
|
||||
{
|
||||
stdenv
|
||||
, bigotes
|
||||
, cmake
|
||||
, clangOmpss2
|
||||
, openmp
|
||||
, openmpv
|
||||
, nanos6
|
||||
, nodes
|
||||
, nosv
|
||||
, mpi
|
||||
, tampi
|
||||
, openblas
|
||||
, ovni
|
||||
, gitBranch ? "master"
|
||||
, gitURL ? "ssh://git@bscpm04.bsc.es/rarias/bench6.git"
|
||||
, gitCommit ? "bf29a53113737c3aa74d2fe3d55f59868faea7b4"
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
pname = "bench6";
|
||||
version = "${src.shortRev}";
|
||||
|
||||
src = builtins.fetchGit {
|
||||
url = gitURL;
|
||||
ref = gitBranch;
|
||||
rev = gitCommit;
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clangOmpss2
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
bigotes
|
||||
openmp
|
||||
openmpv
|
||||
nanos6
|
||||
nodes
|
||||
nosv
|
||||
mpi
|
||||
tampi
|
||||
openblas
|
||||
openblas.dev
|
||||
ovni
|
||||
];
|
||||
|
||||
env = {
|
||||
NANOS6_HOME = nanos6;
|
||||
NODES_HOME = nodes;
|
||||
NOSV_HOME = nosv;
|
||||
};
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_C_COMPILER=clang"
|
||||
"-DCMAKE_CXX_COMPILER=clang++"
|
||||
];
|
||||
hardeningDisable = [ "all" ];
|
||||
dontStrip = true;
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
stdenv
|
||||
, fetchFromGitHub
|
||||
, cmake
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation {
|
||||
pname = "bigotes";
|
||||
version = "9dce13";
|
||||
src = fetchFromGitHub {
|
||||
owner = "rodarima";
|
||||
repo = "bigotes";
|
||||
rev = "9dce13446a8da30bea552d569d260d54e0188518";
|
||||
sha256 = "sha256-ktxM3pXiL8YXSK+/IKWYadijhYXqGoLY6adLk36iigE=";
|
||||
};
|
||||
nativeBuildInputs = [ cmake ];
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
HOSTCXX ?= g++
|
||||
NVCC := nvcc -ccbin $(HOSTCXX)
|
||||
CXXFLAGS := -m64
|
||||
|
||||
# Target rules
|
||||
all: cudainfo
|
||||
|
||||
cudainfo: cudainfo.cpp
|
||||
$(NVCC) $(CXXFLAGS) -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f cudainfo cudainfo.o
|
||||
@@ -1,600 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
|
||||
|
||||
// Shared Utilities (QA Testing)
|
||||
|
||||
// std::system includes
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
|
||||
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
|
||||
|
||||
// CUDA Runtime error messages
|
||||
#ifdef __DRIVER_TYPES_H__
|
||||
static const char *_cudaGetErrorEnum(cudaError_t error)
|
||||
{
|
||||
switch (error)
|
||||
{
|
||||
case cudaSuccess:
|
||||
return "cudaSuccess";
|
||||
|
||||
case cudaErrorMissingConfiguration:
|
||||
return "cudaErrorMissingConfiguration";
|
||||
|
||||
case cudaErrorMemoryAllocation:
|
||||
return "cudaErrorMemoryAllocation";
|
||||
|
||||
case cudaErrorInitializationError:
|
||||
return "cudaErrorInitializationError";
|
||||
|
||||
case cudaErrorLaunchFailure:
|
||||
return "cudaErrorLaunchFailure";
|
||||
|
||||
case cudaErrorPriorLaunchFailure:
|
||||
return "cudaErrorPriorLaunchFailure";
|
||||
|
||||
case cudaErrorLaunchTimeout:
|
||||
return "cudaErrorLaunchTimeout";
|
||||
|
||||
case cudaErrorLaunchOutOfResources:
|
||||
return "cudaErrorLaunchOutOfResources";
|
||||
|
||||
case cudaErrorInvalidDeviceFunction:
|
||||
return "cudaErrorInvalidDeviceFunction";
|
||||
|
||||
case cudaErrorInvalidConfiguration:
|
||||
return "cudaErrorInvalidConfiguration";
|
||||
|
||||
case cudaErrorInvalidDevice:
|
||||
return "cudaErrorInvalidDevice";
|
||||
|
||||
case cudaErrorInvalidValue:
|
||||
return "cudaErrorInvalidValue";
|
||||
|
||||
case cudaErrorInvalidPitchValue:
|
||||
return "cudaErrorInvalidPitchValue";
|
||||
|
||||
case cudaErrorInvalidSymbol:
|
||||
return "cudaErrorInvalidSymbol";
|
||||
|
||||
case cudaErrorMapBufferObjectFailed:
|
||||
return "cudaErrorMapBufferObjectFailed";
|
||||
|
||||
case cudaErrorUnmapBufferObjectFailed:
|
||||
return "cudaErrorUnmapBufferObjectFailed";
|
||||
|
||||
case cudaErrorInvalidHostPointer:
|
||||
return "cudaErrorInvalidHostPointer";
|
||||
|
||||
case cudaErrorInvalidDevicePointer:
|
||||
return "cudaErrorInvalidDevicePointer";
|
||||
|
||||
case cudaErrorInvalidTexture:
|
||||
return "cudaErrorInvalidTexture";
|
||||
|
||||
case cudaErrorInvalidTextureBinding:
|
||||
return "cudaErrorInvalidTextureBinding";
|
||||
|
||||
case cudaErrorInvalidChannelDescriptor:
|
||||
return "cudaErrorInvalidChannelDescriptor";
|
||||
|
||||
case cudaErrorInvalidMemcpyDirection:
|
||||
return "cudaErrorInvalidMemcpyDirection";
|
||||
|
||||
case cudaErrorAddressOfConstant:
|
||||
return "cudaErrorAddressOfConstant";
|
||||
|
||||
case cudaErrorTextureFetchFailed:
|
||||
return "cudaErrorTextureFetchFailed";
|
||||
|
||||
case cudaErrorTextureNotBound:
|
||||
return "cudaErrorTextureNotBound";
|
||||
|
||||
case cudaErrorSynchronizationError:
|
||||
return "cudaErrorSynchronizationError";
|
||||
|
||||
case cudaErrorInvalidFilterSetting:
|
||||
return "cudaErrorInvalidFilterSetting";
|
||||
|
||||
case cudaErrorInvalidNormSetting:
|
||||
return "cudaErrorInvalidNormSetting";
|
||||
|
||||
case cudaErrorMixedDeviceExecution:
|
||||
return "cudaErrorMixedDeviceExecution";
|
||||
|
||||
case cudaErrorCudartUnloading:
|
||||
return "cudaErrorCudartUnloading";
|
||||
|
||||
case cudaErrorUnknown:
|
||||
return "cudaErrorUnknown";
|
||||
|
||||
case cudaErrorNotYetImplemented:
|
||||
return "cudaErrorNotYetImplemented";
|
||||
|
||||
case cudaErrorMemoryValueTooLarge:
|
||||
return "cudaErrorMemoryValueTooLarge";
|
||||
|
||||
case cudaErrorInvalidResourceHandle:
|
||||
return "cudaErrorInvalidResourceHandle";
|
||||
|
||||
case cudaErrorNotReady:
|
||||
return "cudaErrorNotReady";
|
||||
|
||||
case cudaErrorInsufficientDriver:
|
||||
return "cudaErrorInsufficientDriver";
|
||||
|
||||
case cudaErrorSetOnActiveProcess:
|
||||
return "cudaErrorSetOnActiveProcess";
|
||||
|
||||
case cudaErrorInvalidSurface:
|
||||
return "cudaErrorInvalidSurface";
|
||||
|
||||
case cudaErrorNoDevice:
|
||||
return "cudaErrorNoDevice";
|
||||
|
||||
case cudaErrorECCUncorrectable:
|
||||
return "cudaErrorECCUncorrectable";
|
||||
|
||||
case cudaErrorSharedObjectSymbolNotFound:
|
||||
return "cudaErrorSharedObjectSymbolNotFound";
|
||||
|
||||
case cudaErrorSharedObjectInitFailed:
|
||||
return "cudaErrorSharedObjectInitFailed";
|
||||
|
||||
case cudaErrorUnsupportedLimit:
|
||||
return "cudaErrorUnsupportedLimit";
|
||||
|
||||
case cudaErrorDuplicateVariableName:
|
||||
return "cudaErrorDuplicateVariableName";
|
||||
|
||||
case cudaErrorDuplicateTextureName:
|
||||
return "cudaErrorDuplicateTextureName";
|
||||
|
||||
case cudaErrorDuplicateSurfaceName:
|
||||
return "cudaErrorDuplicateSurfaceName";
|
||||
|
||||
case cudaErrorDevicesUnavailable:
|
||||
return "cudaErrorDevicesUnavailable";
|
||||
|
||||
case cudaErrorInvalidKernelImage:
|
||||
return "cudaErrorInvalidKernelImage";
|
||||
|
||||
case cudaErrorNoKernelImageForDevice:
|
||||
return "cudaErrorNoKernelImageForDevice";
|
||||
|
||||
case cudaErrorIncompatibleDriverContext:
|
||||
return "cudaErrorIncompatibleDriverContext";
|
||||
|
||||
case cudaErrorPeerAccessAlreadyEnabled:
|
||||
return "cudaErrorPeerAccessAlreadyEnabled";
|
||||
|
||||
case cudaErrorPeerAccessNotEnabled:
|
||||
return "cudaErrorPeerAccessNotEnabled";
|
||||
|
||||
case cudaErrorDeviceAlreadyInUse:
|
||||
return "cudaErrorDeviceAlreadyInUse";
|
||||
|
||||
case cudaErrorProfilerDisabled:
|
||||
return "cudaErrorProfilerDisabled";
|
||||
|
||||
case cudaErrorProfilerNotInitialized:
|
||||
return "cudaErrorProfilerNotInitialized";
|
||||
|
||||
case cudaErrorProfilerAlreadyStarted:
|
||||
return "cudaErrorProfilerAlreadyStarted";
|
||||
|
||||
case cudaErrorProfilerAlreadyStopped:
|
||||
return "cudaErrorProfilerAlreadyStopped";
|
||||
|
||||
/* Since CUDA 4.0*/
|
||||
case cudaErrorAssert:
|
||||
return "cudaErrorAssert";
|
||||
|
||||
case cudaErrorTooManyPeers:
|
||||
return "cudaErrorTooManyPeers";
|
||||
|
||||
case cudaErrorHostMemoryAlreadyRegistered:
|
||||
return "cudaErrorHostMemoryAlreadyRegistered";
|
||||
|
||||
case cudaErrorHostMemoryNotRegistered:
|
||||
return "cudaErrorHostMemoryNotRegistered";
|
||||
|
||||
/* Since CUDA 5.0 */
|
||||
case cudaErrorOperatingSystem:
|
||||
return "cudaErrorOperatingSystem";
|
||||
|
||||
case cudaErrorPeerAccessUnsupported:
|
||||
return "cudaErrorPeerAccessUnsupported";
|
||||
|
||||
case cudaErrorLaunchMaxDepthExceeded:
|
||||
return "cudaErrorLaunchMaxDepthExceeded";
|
||||
|
||||
case cudaErrorLaunchFileScopedTex:
|
||||
return "cudaErrorLaunchFileScopedTex";
|
||||
|
||||
case cudaErrorLaunchFileScopedSurf:
|
||||
return "cudaErrorLaunchFileScopedSurf";
|
||||
|
||||
case cudaErrorSyncDepthExceeded:
|
||||
return "cudaErrorSyncDepthExceeded";
|
||||
|
||||
case cudaErrorLaunchPendingCountExceeded:
|
||||
return "cudaErrorLaunchPendingCountExceeded";
|
||||
|
||||
case cudaErrorNotPermitted:
|
||||
return "cudaErrorNotPermitted";
|
||||
|
||||
case cudaErrorNotSupported:
|
||||
return "cudaErrorNotSupported";
|
||||
|
||||
/* Since CUDA 6.0 */
|
||||
case cudaErrorHardwareStackError:
|
||||
return "cudaErrorHardwareStackError";
|
||||
|
||||
case cudaErrorIllegalInstruction:
|
||||
return "cudaErrorIllegalInstruction";
|
||||
|
||||
case cudaErrorMisalignedAddress:
|
||||
return "cudaErrorMisalignedAddress";
|
||||
|
||||
case cudaErrorInvalidAddressSpace:
|
||||
return "cudaErrorInvalidAddressSpace";
|
||||
|
||||
case cudaErrorInvalidPc:
|
||||
return "cudaErrorInvalidPc";
|
||||
|
||||
case cudaErrorIllegalAddress:
|
||||
return "cudaErrorIllegalAddress";
|
||||
|
||||
/* Since CUDA 6.5*/
|
||||
case cudaErrorInvalidPtx:
|
||||
return "cudaErrorInvalidPtx";
|
||||
|
||||
case cudaErrorInvalidGraphicsContext:
|
||||
return "cudaErrorInvalidGraphicsContext";
|
||||
|
||||
case cudaErrorStartupFailure:
|
||||
return "cudaErrorStartupFailure";
|
||||
|
||||
case cudaErrorApiFailureBase:
|
||||
return "cudaErrorApiFailureBase";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
template< typename T >
|
||||
void check(T result, char const *const func, const char *const file, int const line)
|
||||
{
|
||||
if (result)
|
||||
{
|
||||
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
|
||||
file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
|
||||
cudaDeviceReset();
|
||||
// Make sure we call CUDA Device Reset before exiting
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
#if CUDART_VERSION < 5000
|
||||
|
||||
// CUDA-C includes
|
||||
#include <cuda.h>
|
||||
|
||||
// This function wraps the CUDA Driver API into a template function
|
||||
template <class T>
|
||||
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
|
||||
{
|
||||
CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
|
||||
|
||||
if (CUDA_SUCCESS != error) {
|
||||
fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
|
||||
error, __FILE__, __LINE__);
|
||||
|
||||
// cudaDeviceReset causes the driver to clean up all state. While
|
||||
// not mandatory in normal operation, it is good practice. It is also
|
||||
// needed to ensure correct operation when the application is being
|
||||
// profiled. Calling cudaDeviceReset causes all profile data to be
|
||||
// flushed before the application exits
|
||||
cudaDeviceReset();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CUDART_VERSION < 5000 */
|
||||
|
||||
// Beginning of GPU Architecture definitions
|
||||
inline int ConvertSMVer2Cores(int major, int minor)
|
||||
{
|
||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||
int Cores;
|
||||
} sSMtoCores;
|
||||
|
||||
sSMtoCores nGpuArchCoresPerSM[] = {
|
||||
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
||||
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
||||
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
|
||||
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
|
||||
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
|
||||
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
|
||||
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
|
||||
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
|
||||
{ -1, -1 }
|
||||
};
|
||||
|
||||
int index = 0;
|
||||
|
||||
while (nGpuArchCoresPerSM[index].SM != -1) {
|
||||
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
|
||||
return nGpuArchCoresPerSM[index].Cores;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
// If we don't find the values, we default use the previous one to run properly
|
||||
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
|
||||
return nGpuArchCoresPerSM[index-1].Cores;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
|
||||
|
||||
int deviceCount = 0;
|
||||
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
|
||||
|
||||
if (error_id != cudaSuccess) {
|
||||
printf("cudaGetDeviceCount failed: %s (%d)\n",
|
||||
cudaGetErrorString(error_id), (int) error_id);
|
||||
printf("Result = FAIL\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// This function call returns 0 if there are no CUDA capable devices.
|
||||
if (deviceCount == 0)
|
||||
printf("There are no available device(s) that support CUDA\n");
|
||||
else
|
||||
printf("Detected %d CUDA Capable device(s)\n", deviceCount);
|
||||
|
||||
int dev, driverVersion = 0, runtimeVersion = 0;
|
||||
|
||||
for (dev = 0; dev < deviceCount; ++dev) {
|
||||
cudaSetDevice(dev);
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, dev);
|
||||
|
||||
printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
|
||||
|
||||
// Console log
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);
|
||||
|
||||
printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n",
|
||||
(float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
|
||||
|
||||
printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",
|
||||
deviceProp.multiProcessorCount,
|
||||
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
|
||||
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
|
||||
printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
|
||||
|
||||
|
||||
#if CUDART_VERSION >= 5000
|
||||
// This is supported in CUDA 5.0 (runtime API device properties)
|
||||
printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
|
||||
printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
|
||||
|
||||
if (deviceProp.l2CacheSize) {
|
||||
printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);
|
||||
}
|
||||
|
||||
#else
|
||||
// This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
|
||||
int memoryClock;
|
||||
getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
|
||||
printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);
|
||||
int memBusWidth;
|
||||
getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
|
||||
printf(" Memory Bus Width: %d-bit\n", memBusWidth);
|
||||
int L2CacheSize;
|
||||
getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
|
||||
|
||||
if (L2CacheSize) {
|
||||
printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
|
||||
deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
|
||||
deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
|
||||
printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
|
||||
deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
|
||||
printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",
|
||||
deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
|
||||
|
||||
|
||||
printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);
|
||||
printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);
|
||||
printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
|
||||
printf(" Warp size: %d\n", deviceProp.warpSize);
|
||||
printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
|
||||
printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
|
||||
printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
|
||||
deviceProp.maxThreadsDim[0],
|
||||
deviceProp.maxThreadsDim[1],
|
||||
deviceProp.maxThreadsDim[2]);
|
||||
printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
|
||||
deviceProp.maxGridSize[0],
|
||||
deviceProp.maxGridSize[1],
|
||||
deviceProp.maxGridSize[2]);
|
||||
printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);
|
||||
printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);
|
||||
printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
|
||||
printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
|
||||
printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
|
||||
printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
|
||||
printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
|
||||
printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
|
||||
#endif
|
||||
printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
|
||||
printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
|
||||
|
||||
const char *sComputeMode[] = {
|
||||
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
|
||||
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
|
||||
"Prohibited (no host thread can use ::cudaSetDevice() with this device)",
|
||||
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
|
||||
"Unknown",
|
||||
NULL
|
||||
};
|
||||
printf(" Compute Mode:\n");
|
||||
printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
|
||||
}
|
||||
|
||||
// If there are 2 or more GPUs, query to determine whether RDMA is supported
|
||||
if (deviceCount >= 2)
|
||||
{
|
||||
cudaDeviceProp prop[64];
|
||||
int gpuid[64]; // we want to find the first two GPU's that can support P2P
|
||||
int gpu_p2p_count = 0;
|
||||
|
||||
for (int i=0; i < deviceCount; i++)
|
||||
{
|
||||
checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
|
||||
|
||||
// Only boards based on Fermi or later can support P2P
|
||||
if ((prop[i].major >= 2)
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
// on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
|
||||
&& prop[i].tccDriver
|
||||
#endif
|
||||
)
|
||||
{
|
||||
// This is an array of P2P capable GPUs
|
||||
gpuid[gpu_p2p_count++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Show all the combinations of support P2P GPUs
|
||||
int can_access_peer_0_1, can_access_peer_1_0;
|
||||
|
||||
if (gpu_p2p_count >= 2)
|
||||
{
|
||||
for (int i = 0; i < gpu_p2p_count-1; i++)
|
||||
{
|
||||
for (int j = 1; j < gpu_p2p_count; j++)
|
||||
{
|
||||
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
|
||||
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
|
||||
prop[gpuid[j]].name, gpuid[j] ,
|
||||
can_access_peer_0_1 ? "Yes" : "No");
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 1; j < gpu_p2p_count; j++)
|
||||
{
|
||||
for (int i = 0; i < gpu_p2p_count-1; i++)
|
||||
{
|
||||
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
|
||||
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
|
||||
prop[gpuid[i]].name, gpuid[i] ,
|
||||
can_access_peer_1_0 ? "Yes" : "No");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// csv masterlog info
|
||||
// *****************************
|
||||
// exe and CUDA driver name
|
||||
printf("\n");
|
||||
std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
|
||||
char cTemp[128];
|
||||
|
||||
// driver version
|
||||
sProfileString += ", CUDA Driver Version = ";
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
|
||||
#else
|
||||
sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
|
||||
#endif
|
||||
sProfileString += cTemp;
|
||||
|
||||
// Runtime version
|
||||
sProfileString += ", CUDA Runtime Version = ";
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
#else
|
||||
sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
#endif
|
||||
sProfileString += cTemp;
|
||||
|
||||
// Device count
|
||||
sProfileString += ", NumDevs = ";
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
sprintf_s(cTemp, 10, "%d", deviceCount);
|
||||
#else
|
||||
sprintf(cTemp, "%d", deviceCount);
|
||||
#endif
|
||||
sProfileString += cTemp;
|
||||
|
||||
// Print Out all device Names
|
||||
for (dev = 0; dev < deviceCount; ++dev)
|
||||
{
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
sprintf_s(cTemp, 13, ", Device%d = ", dev);
|
||||
#else
|
||||
sprintf(cTemp, ", Device%d = ", dev);
|
||||
#endif
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, dev);
|
||||
sProfileString += cTemp;
|
||||
sProfileString += deviceProp.name;
|
||||
}
|
||||
|
||||
sProfileString += "\n";
|
||||
printf("%s", sProfileString.c_str());
|
||||
|
||||
printf("Result = PASS\n");
|
||||
|
||||
// finish
|
||||
// cudaDeviceReset causes the driver to clean up all state. While
|
||||
// not mandatory in normal operation, it is good practice. It is also
|
||||
// needed to ensure correct operation when the application is being
|
||||
// profiled. Calling cudaDeviceReset causes all profile data to be
|
||||
// flushed before the application exits
|
||||
cudaDeviceReset();
|
||||
return 0;
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
{
|
||||
stdenv
|
||||
, cudatoolkit
|
||||
, cudaPackages
|
||||
, autoAddDriverRunpath
|
||||
, strace
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
name = "cudainfo";
|
||||
src = ./.;
|
||||
buildInputs = [
|
||||
cudatoolkit # Required for nvcc
|
||||
cudaPackages.cuda_cudart.static # Required for -lcudart_static
|
||||
autoAddDriverRunpath
|
||||
];
|
||||
installPhase = ''
|
||||
mkdir -p $out/bin
|
||||
cp -a cudainfo $out/bin
|
||||
'';
|
||||
passthru.gpuCheck = stdenv.mkDerivation {
|
||||
name = "cudainfo-test";
|
||||
requiredSystemFeatures = [ "cuda" ];
|
||||
dontBuild = true;
|
||||
nativeCheckInputs = [
|
||||
finalAttrs.finalPackage # The cudainfo package from above
|
||||
strace # When it fails, it will show the trace
|
||||
];
|
||||
dontUnpack = true;
|
||||
doCheck = true;
|
||||
checkPhase = ''
|
||||
if ! cudainfo; then
|
||||
set -x
|
||||
cudainfo=$(command -v cudainfo)
|
||||
ldd $cudainfo
|
||||
readelf -d $cudainfo
|
||||
strace -f $cudainfo
|
||||
set +x
|
||||
fi
|
||||
'';
|
||||
installPhase = "touch $out";
|
||||
};
|
||||
})
|
||||
@@ -1,13 +0,0 @@
|
||||
diff --git a/src/merger/common/bfd_manager.c b/src/merger/common/bfd_manager.c
|
||||
index 5f9dacf9..5231e3eb 100644
|
||||
--- a/src/merger/common/bfd_manager.c
|
||||
+++ b/src/merger/common/bfd_manager.c
|
||||
@@ -225,7 +225,7 @@ asymbol **BFDmanager_getDefaultSymbols (void)
|
||||
*
|
||||
* @return No return value.
|
||||
*/
|
||||
-static void BFDmanager_findAddressInSection (bfd * abfd, asection * section, PTR data)
|
||||
+static void BFDmanager_findAddressInSection (bfd * abfd, asection * section, void * data)
|
||||
{
|
||||
#if HAVE_BFD_GET_SECTION_SIZE || HAVE_BFD_SECTION_SIZE || HAVE_BFD_GET_SECTION_SIZE_BEFORE_RELOC
|
||||
bfd_size_type size;
|
||||
@@ -1,114 +0,0 @@
|
||||
{ stdenv
|
||||
, lib
|
||||
, fetchFromGitHub
|
||||
, boost
|
||||
, libdwarf
|
||||
, libelf
|
||||
, libxml2
|
||||
, libunwind
|
||||
, papi
|
||||
, binutils-unwrapped
|
||||
, libiberty
|
||||
, gfortran
|
||||
, xml2
|
||||
, which
|
||||
, libbfd
|
||||
, mpi ? null
|
||||
, cuda ? null
|
||||
, llvmPackages
|
||||
, autoreconfHook
|
||||
#, python3Packages
|
||||
, installShellFiles
|
||||
, symlinkJoin
|
||||
, enablePapi ? stdenv.hostPlatform == stdenv.buildPlatform # Disabled when cross-compiling
|
||||
}:
|
||||
|
||||
let
|
||||
libdwarfBundle = symlinkJoin {
|
||||
name = "libdwarfBundle";
|
||||
paths = [ libdwarf.dev libdwarf.lib libdwarf.out ];
|
||||
};
|
||||
in
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
pname = "extrae";
|
||||
version = "4.0.1";
|
||||
src = fetchFromGitHub {
|
||||
owner = "bsc-performance-tools";
|
||||
repo = "extrae";
|
||||
rev = "${version}";
|
||||
sha256 = "SlMYxNQXJ0Xg90HmpnotUR3tEPVVBXhk1NtEBJwGBR4=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
# FIXME: Waiting for German to merge this patch. Still not in master, merged
|
||||
# on 2023-03-01 in devel branch (after 3 years), see:
|
||||
# https://github.com/bsc-performance-tools/extrae/pull/45
|
||||
./use-command.patch
|
||||
# https://github.com/bsc-performance-tools/extrae/issues/71
|
||||
./PTR.patch
|
||||
];
|
||||
|
||||
enableParallelBuilding = true;
|
||||
hardeningDisable = [ "all" ];
|
||||
|
||||
nativeBuildInputs = [ installShellFiles ];
|
||||
|
||||
buildInputs = [
|
||||
autoreconfHook
|
||||
gfortran
|
||||
libunwind
|
||||
binutils-unwrapped
|
||||
boost
|
||||
boost.dev
|
||||
libiberty
|
||||
mpi
|
||||
xml2
|
||||
which
|
||||
libxml2.dev
|
||||
libbfd
|
||||
#python3Packages.sphinx
|
||||
]
|
||||
++ lib.optional stdenv.cc.isClang llvmPackages.openmp;
|
||||
|
||||
preConfigure = ''
|
||||
configureFlagsArray=(
|
||||
--enable-posix-clock
|
||||
--with-binutils="${binutils-unwrapped} ${libiberty}"
|
||||
--with-dwarf=${libdwarfBundle}
|
||||
--with-elf=${libelf}
|
||||
--with-boost=${boost.dev}
|
||||
--enable-instrument-io
|
||||
--enable-instrument-dynamic-memory
|
||||
--without-memkind
|
||||
--enable-merge-in-trace
|
||||
--disable-online
|
||||
--without-opencl
|
||||
--enable-pebs-sampling
|
||||
--enable-sampling
|
||||
--with-unwind=${libunwind.dev}
|
||||
--with-xml-prefix=${libxml2.dev}
|
||||
${lib.optionalString enablePapi "--with-papi=${papi}"}
|
||||
${if (mpi != null) then ''--with-mpi=${mpi}''
|
||||
else ''--without-mpi''}
|
||||
--without-dyninst)
|
||||
'';
|
||||
|
||||
# Install the manuals only by hand, as we don't want to pull the complete
|
||||
# LaTeX world
|
||||
|
||||
# FIXME: sphinx is broken
|
||||
#postBuild = ''
|
||||
# make -C docs man
|
||||
#'';
|
||||
#
|
||||
#postInstall = ''
|
||||
# installManPage docs/builds/man/*/*
|
||||
#'';
|
||||
|
||||
# ++ (
|
||||
# if (openmp)
|
||||
# then [ "--enable-openmp" ]
|
||||
# else []
|
||||
# );
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
diff --git a/substitute b/substitute
|
||||
index d5615606..82ca91a5 100755
|
||||
--- a/substitute
|
||||
+++ b/substitute
|
||||
@@ -16,7 +16,7 @@ UNAME=`uname`
|
||||
if [ "${UNAME}" = "Darwin" -o "${UNAME}" = "AIX" ] ; then
|
||||
TMPFILE=substitute-$$
|
||||
${SED} "s|${KEY}|${VALUE}|g" < ${FILE} >${TMPFILE}
|
||||
- /bin/mv -f ${TMPFILE} ${FILE}
|
||||
+ command mv -f ${TMPFILE} ${FILE}
|
||||
else
|
||||
${SED} "s|${KEY}|${VALUE}|g" -i ${FILE}
|
||||
fi
|
||||
diff --git a/substitute-all b/substitute-all
|
||||
index 48c6b76a..eda7a0f2 100755
|
||||
--- a/substitute-all
|
||||
+++ b/substitute-all
|
||||
@@ -23,5 +23,5 @@ fi
|
||||
|
||||
echo "Applying modification in ${PATHTOCHANGE} - Key = ${KEY} for value = ${VALUE}"
|
||||
|
||||
-/usr/bin/find ${PATHTOCHANGE} -type f -exec ${SCRIPT_LOCATION} "${SED}" "${KEY}" "${VALUE}" {} \;
|
||||
+command find ${PATHTOCHANGE} -type f -exec ${SCRIPT_LOCATION} "${SED}" "${KEY}" "${VALUE}" {} \;
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
{
|
||||
stdenv
|
||||
, fetchurl
|
||||
, symlinkJoin
|
||||
, slurm
|
||||
, rdma-core
|
||||
, autoconf
|
||||
, automake
|
||||
, libtool
|
||||
, mpi
|
||||
, rsync
|
||||
, gfortran
|
||||
}:
|
||||
|
||||
let
|
||||
rdma-core-all = symlinkJoin {
|
||||
name ="rdma-core-all";
|
||||
paths = [ rdma-core.dev rdma-core.out ];
|
||||
};
|
||||
mpiAll = symlinkJoin {
|
||||
name = "mpi-all";
|
||||
paths = [ mpi.all ];
|
||||
};
|
||||
in
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
pname = "GPI-2";
|
||||
version = "tagaspi-2021.11";
|
||||
|
||||
src = fetchurl {
|
||||
url = "https://pm.bsc.es/gitlab/interoperability/extern/GPI-2/-/archive/${version}/GPI-2-${version}.tar.gz";
|
||||
hash = "sha256-eY2wpyTpnOXRoAcYoAP82Jq9Q7p5WwDpMj+f1vEX5zw=";
|
||||
};
|
||||
|
||||
enableParallelBuilding = true;
|
||||
|
||||
patches = [ ./rdma-core.patch ./max-mem.patch ];
|
||||
|
||||
preConfigure = ''
|
||||
patchShebangs autogen.sh
|
||||
./autogen.sh
|
||||
'';
|
||||
|
||||
configureFlags = [
|
||||
"--with-infiniband=${rdma-core-all}"
|
||||
"--with-mpi=${mpiAll}"
|
||||
"--with-slurm"
|
||||
"CFLAGS=-fPIC"
|
||||
"CXXFLAGS=-fPIC"
|
||||
];
|
||||
|
||||
buildInputs = [ slurm mpiAll rdma-core-all autoconf automake libtool rsync gfortran ];
|
||||
|
||||
hardeningDisable = [ "all" ];
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
--- a/tests/tests/segments/max_mem.c 2025-09-12 13:30:53.449353591 +0200
|
||||
+++ b/tests/tests/segments/max_mem.c 2025-09-12 13:33:49.750352401 +0200
|
||||
@@ -1,5 +1,7 @@
|
||||
#include <test_utils.h>
|
||||
|
||||
+gaspi_size_t gaspi_get_system_mem (void);
|
||||
+
|
||||
/* Test allocates 45% of system memory and creates a segment that
|
||||
large or if several ranks per node exist, divided among that
|
||||
number */
|
||||
@@ -1,12 +0,0 @@
|
||||
--- a/src/devices/ib/GPI2_IB.h 2025-09-12 13:25:31.564181121 +0200
|
||||
+++ b/src/devices/ib/GPI2_IB.h 2025-09-12 13:24:49.105422150 +0200
|
||||
@@ -26,6 +26,9 @@ along with GPI-2. If not, see <http://ww
|
||||
|
||||
#include "GPI2_Dev.h"
|
||||
|
||||
+/* Missing prototype as driver.h is now private */
|
||||
+int ibv_read_sysfs_file(const char *dir, const char *file, char *buf, size_t size);
|
||||
+
|
||||
#define GASPI_GID_INDEX (0)
|
||||
#define PORT_LINK_UP (5)
|
||||
#define MAX_INLINE_BYTES (128)
|
||||
@@ -1,492 +0,0 @@
|
||||
{ stdenv
|
||||
, fetchurl
|
||||
, lib
|
||||
, dpkg
|
||||
, rsync
|
||||
, libffi_3_3
|
||||
, libelf
|
||||
, libxml2
|
||||
, hwloc
|
||||
, zlib
|
||||
, autoPatchelfHook
|
||||
, libfabric
|
||||
, gcc13
|
||||
, wrapCCWith
|
||||
}:
|
||||
|
||||
# The distribution of intel packages is a mess. We are doing the installation
|
||||
# based on the .deb metapackage "intel-hpckit", and follow de dependencies,
|
||||
# which have mismatching versions.
|
||||
|
||||
# Bruno Bzeznik (bzizou) went through the madness of using their .sh installer,
|
||||
# pulling all the X dependencies here:
|
||||
# https://github.com/Gricad/nur-packages/blob/4b67c8ad0ce1baa1d2f53ba41ae5bca8e00a9a63/pkgs/intel/oneapi.nix
|
||||
|
||||
# But this is an attempt to install the packages from the APT repo
|
||||
|
||||
let
|
||||
|
||||
gcc = gcc13;
|
||||
|
||||
v = {
|
||||
hpckit = "2023.1.0";
|
||||
compiler = "2023.1.0";
|
||||
tbb = "2021.9.0";
|
||||
mpi = "2021.9.0";
|
||||
};
|
||||
|
||||
aptPackageIndex = stdenv.mkDerivation {
|
||||
name = "intel-oneapi-packages";
|
||||
srcs = [
|
||||
# Run update.sh to update the package lists
|
||||
./amd64-packages ./all-packages
|
||||
];
|
||||
phases = [ "installPhase" ];
|
||||
installPhase = ''
|
||||
awk -F': ' '\
|
||||
BEGIN { print "[ {" } \
|
||||
NR>1 && /^Package: / { print "} {"; } \
|
||||
/: / { printf "%s = \"%s\";\n", $1, $2 } \
|
||||
END { print "} ]" }' $srcs > $out
|
||||
'';
|
||||
};
|
||||
|
||||
aptPackages = import aptPackageIndex;
|
||||
|
||||
apthost = "https://apt.repos.intel.com/oneapi/";
|
||||
|
||||
getSum = pkgList: name:
|
||||
let
|
||||
matches = lib.filter (x: name == x.Package) pkgList;
|
||||
#n = lib.length matches;
|
||||
#match = builtins.trace (name + " -- ${builtins.toString n}") (lib.elemAt matches 0);
|
||||
match = lib.elemAt matches 0;
|
||||
in
|
||||
match.SHA256;
|
||||
|
||||
getUrl = pkgList: name:
|
||||
let
|
||||
matches = lib.filter (x: name == x.Package) pkgList;
|
||||
#match = assert lib.length matches == 1; lib.elemAt matches 0;
|
||||
n = lib.length matches;
|
||||
match =
|
||||
#builtins.trace (name + " -- n=${builtins.toString n}")
|
||||
(lib.elemAt matches 0);
|
||||
in
|
||||
apthost + match.Filename;
|
||||
|
||||
uncompressDebs = debs: name: stdenv.mkDerivation {
|
||||
name = name;
|
||||
srcs = debs;
|
||||
nativeBuildInputs = [ dpkg ];
|
||||
phases = [ "installPhase" ];
|
||||
installPhase = ''
|
||||
mkdir -p $out
|
||||
for src in $srcs; do
|
||||
echo "unpacking $src"
|
||||
dpkg -x $src $out
|
||||
done
|
||||
'';
|
||||
};
|
||||
|
||||
joinDebs = name: names:
|
||||
let
|
||||
urls = builtins.map (x: getUrl aptPackages x) names;
|
||||
sums = builtins.map (x: getSum aptPackages x) names;
|
||||
getsrc = url: sha256: builtins.fetchurl { inherit url sha256; };
|
||||
debs = lib.zipListsWith getsrc urls sums;
|
||||
in
|
||||
uncompressDebs debs "${name}-source";
|
||||
|
||||
|
||||
intel-mpi = stdenv.mkDerivation rec {
|
||||
version = v.mpi;
|
||||
pname = "intel-mpi";
|
||||
|
||||
src = joinDebs pname [
|
||||
"intel-oneapi-mpi-devel-${version}"
|
||||
"intel-oneapi-mpi-${version}"
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
autoPatchelfHook
|
||||
rsync
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
libfabric
|
||||
zlib
|
||||
stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
phases = [ "installPhase" "fixupPhase" ];
|
||||
dontStrip = true;
|
||||
installPhase = ''
|
||||
mkdir -p $out/{bin,etc,lib,include}
|
||||
mkdir -p $out/share/man
|
||||
|
||||
cd $src
|
||||
|
||||
# MPI
|
||||
pushd opt/intel/oneapi/mpi/${version}
|
||||
rsync -a man/ $out/share/man/
|
||||
rsync -a etc/ $out/etc/
|
||||
rsync -a include/ $out/include/
|
||||
cp -a lib/lib* $out/lib/
|
||||
# Copy the actual libmpi.so from release
|
||||
cp -a lib/release/lib* $out/lib
|
||||
# Broken due missing libze_loader.so.1
|
||||
rsync -a --exclude IMB-MPI1-GPU bin/ $out/bin/
|
||||
popd
|
||||
'';
|
||||
preFixup = ''
|
||||
for i in $out/bin/mpi* ; do
|
||||
echo "Fixing paths in $i"
|
||||
sed -i "s:I_MPI_SUBSTITUTE_INSTALLDIR:$out:g" "$i"
|
||||
done
|
||||
'';
|
||||
};
|
||||
|
||||
intel-tbb = stdenv.mkDerivation rec {
|
||||
version = v.tbb;
|
||||
pname = "intel-tbb";
|
||||
src = joinDebs pname [
|
||||
"intel-oneapi-tbb-${version}"
|
||||
"intel-oneapi-tbb-common-${version}"
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
intel-mpi
|
||||
libffi_3_3
|
||||
libelf
|
||||
libxml2
|
||||
hwloc
|
||||
stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
autoPatchelfHook
|
||||
rsync
|
||||
];
|
||||
phases = [ "installPhase" "fixupPhase" ];
|
||||
dontStrip = true;
|
||||
|
||||
autoPatchelfIgnoreMissingDeps = [ "libhwloc.so.5" ];
|
||||
|
||||
installPhase = ''
|
||||
mkdir -p $out/lib
|
||||
|
||||
cd $src
|
||||
|
||||
pushd opt/intel/oneapi/tbb/${version}
|
||||
# Libraries
|
||||
rsync -a lib/intel64/gcc4.8/ $out/lib/
|
||||
popd
|
||||
'';
|
||||
};
|
||||
|
||||
intel-compiler-shared = stdenv.mkDerivation rec {
|
||||
version = v.compiler;
|
||||
pname = "intel-compiler-shared";
|
||||
src = joinDebs pname [
|
||||
"intel-oneapi-compiler-shared-${version}"
|
||||
"intel-oneapi-compiler-shared-common-${version}"
|
||||
"intel-oneapi-compiler-shared-runtime-${version}"
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
intel-mpi
|
||||
intel-tbb
|
||||
libffi_3_3
|
||||
libelf
|
||||
libxml2
|
||||
zlib
|
||||
hwloc
|
||||
stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
autoPatchelfHook
|
||||
rsync
|
||||
];
|
||||
phases = [ "installPhase" "fixupPhase" ];
|
||||
dontStrip = true;
|
||||
|
||||
autoPatchelfIgnoreMissingDeps = [ "libsycl.so.6" ];
|
||||
|
||||
installPhase = ''
|
||||
mkdir -p $out/{bin,lib,include}
|
||||
mkdir -p $out/share/man
|
||||
|
||||
cd $src
|
||||
|
||||
# Compiler
|
||||
pushd opt/intel/oneapi/compiler/${version}
|
||||
pushd linux
|
||||
# Binaries
|
||||
rsync -a bin/ $out/bin/
|
||||
rsync -a --exclude libcilkrts.so.5 bin/intel64/ $out/bin/
|
||||
|
||||
# Libraries
|
||||
rsync -a lib/ $out/lib/
|
||||
rsync -a lib/x64/ $out/lib/
|
||||
rsync -a compiler/lib/intel64_lin/ $out/lib/
|
||||
chmod +w $out/lib
|
||||
cp bin/intel64/libcilkrts.so.5 $out/lib/
|
||||
ln -s $out/lib/libcilkrts.so.5 $out/lib/libcilkrts.so
|
||||
|
||||
# Headers
|
||||
rsync -a compiler/include/ $out/include/
|
||||
popd
|
||||
popd
|
||||
'';
|
||||
};
|
||||
|
||||
|
||||
intel-compiler-fortran = stdenv.mkDerivation rec {
|
||||
version = v.compiler;
|
||||
pname = "intel-fortran";
|
||||
src = joinDebs pname [
|
||||
"intel-oneapi-compiler-fortran-${version}"
|
||||
"intel-oneapi-compiler-fortran-common-${version}"
|
||||
"intel-oneapi-compiler-fortran-runtime-${version}"
|
||||
|
||||
"intel-oneapi-compiler-dpcpp-cpp-classic-fortran-shared-runtime-${version}"
|
||||
#"intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-${version}"
|
||||
#"intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-runtime-${version}"
|
||||
];
|
||||
|
||||
langFortran = true;
|
||||
|
||||
buildInputs = [
|
||||
intel-mpi
|
||||
intel-compiler-shared
|
||||
libffi_3_3
|
||||
libelf
|
||||
libxml2
|
||||
hwloc
|
||||
stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
autoPatchelfHook
|
||||
rsync
|
||||
];
|
||||
|
||||
phases = [ "installPhase" "fixupPhase" ];
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
installPhase = ''
|
||||
mkdir -p $out/{bin,lib,include}
|
||||
mkdir -p $out/share/man
|
||||
|
||||
cd $src
|
||||
|
||||
# Compiler
|
||||
pushd opt/intel/oneapi/compiler/${version}
|
||||
pushd linux
|
||||
# Binaries
|
||||
rsync -a bin/ $out/bin/
|
||||
rsync -a bin/intel64/ $out/bin/
|
||||
|
||||
# Libraries
|
||||
rsync -a lib/ $out/lib/
|
||||
rsync -a compiler/lib/intel64_lin/ $out/lib/
|
||||
|
||||
# Headers
|
||||
rsync -a compiler/include/ $out/include/
|
||||
popd
|
||||
|
||||
# Manuals
|
||||
rsync -a documentation/en/man/common/ $out/share/man/
|
||||
|
||||
# Fix lib_lin
|
||||
ln -s $out/lib $out/lib_lin
|
||||
popd
|
||||
'';
|
||||
};
|
||||
|
||||
intel-compiler = stdenv.mkDerivation rec {
|
||||
version = v.compiler;
|
||||
pname = "intel-compiler";
|
||||
src = joinDebs pname [
|
||||
# C/C++
|
||||
"intel-oneapi-dpcpp-cpp-${version}"
|
||||
"intel-oneapi-compiler-dpcpp-cpp-${version}"
|
||||
"intel-oneapi-compiler-dpcpp-cpp-common-${version}"
|
||||
"intel-oneapi-compiler-dpcpp-cpp-runtime-${version}"
|
||||
"intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-${version}"
|
||||
"intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-common-${version}"
|
||||
"intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-runtime-${version}"
|
||||
"intel-oneapi-compiler-dpcpp-cpp-classic-fortran-shared-runtime-${version}"
|
||||
];
|
||||
dontCheckForBrokenSymlinks = true;
|
||||
# From https://aur.archlinux.org/packages/intel-oneapi-compiler:
|
||||
# - intel-oneapi-compiler-cpp-eclipse-cfg-2023.0.0-25370_all.deb
|
||||
# + intel-oneapi-compiler-dpcpp-cpp-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# x intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.0.0-2023.0.0-25370_amd64.deb (empty)
|
||||
# + intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.0.0-25370_amd64.deb
|
||||
# + intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-common-2023.0.0-2023.0.0-25370_all.deb
|
||||
# + intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-runtime-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# + intel-oneapi-compiler-dpcpp-cpp-classic-fortran-shared-runtime-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# + intel-oneapi-compiler-dpcpp-cpp-common-2023.0.0-2023.0.0-25370_all.deb
|
||||
# + intel-oneapi-compiler-dpcpp-cpp-runtime-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-compiler-dpcpp-eclipse-cfg-2023.0.0-25370_all.deb
|
||||
# - intel-oneapi-compiler-fortran-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-compiler-fortran-common-2023.0.0-2023.0.0-25370_all.deb
|
||||
# - intel-oneapi-compiler-fortran-runtime-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-compiler-fortran-runtime-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-compiler-shared-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-compiler-shared-common-2023.0.0-2023.0.0-25370_all.deb
|
||||
# - intel-oneapi-compiler-shared-runtime-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-dpcpp-cpp-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-openmp-2023.0.0-2023.0.0-25370_amd64.deb
|
||||
# - intel-oneapi-openmp-common-2023.0.0-2023.0.0-25370_all.deb
|
||||
|
||||
buildInputs = [
|
||||
intel-compiler-shared
|
||||
libffi_3_3
|
||||
libelf
|
||||
libxml2
|
||||
hwloc
|
||||
stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
autoPatchelfHook
|
||||
rsync
|
||||
];
|
||||
autoPatchelfIgnoreMissingDeps = [ "libtbb.so.12" "libtbbmalloc.so.2" "libze_loader.so.1" ];
|
||||
|
||||
phases = [ "installPhase" "fixupPhase" ];
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
installPhase = ''
|
||||
mkdir -p $out/{bin,lib}
|
||||
mkdir -p $out/share/man
|
||||
|
||||
cd $src
|
||||
|
||||
# Compiler
|
||||
pushd opt/intel/oneapi/compiler/${version}
|
||||
pushd linux
|
||||
# Binaries
|
||||
rsync -a bin/ $out/bin/
|
||||
rsync -a bin-llvm/ $out/bin/
|
||||
rsync -a bin/intel64/ $out/bin/
|
||||
|
||||
# Libraries
|
||||
rsync -a --exclude oclfpga lib/ $out/lib/
|
||||
rsync -a compiler/lib/intel64_lin/ $out/lib/
|
||||
|
||||
# Headers
|
||||
rsync -a compiler/include/ $out/include/ # Intrinsics for icc
|
||||
rsync -a include/ $out/include/
|
||||
chmod +w $out/include
|
||||
ln -s $out/lib/clang/16.0.0/include/ $out/include/icx # For icx
|
||||
popd
|
||||
|
||||
# Manuals
|
||||
rsync -a documentation/en/man/common/ $out/share/man/
|
||||
popd
|
||||
'';
|
||||
};
|
||||
|
||||
wrapIntel = { cc, mygcc, extraBuild ? "", extraInstall ? "" }:
|
||||
let
|
||||
targetConfig = stdenv.targetPlatform.config;
|
||||
in (wrapCCWith {
|
||||
cc = cc;
|
||||
extraBuildCommands = ''
|
||||
echo "-isystem ${cc}/include" >> $out/nix-support/cc-cflags
|
||||
echo "-isystem ${cc}/include/intel64" >> $out/nix-support/cc-cflags
|
||||
|
||||
echo "-L${mygcc.cc}/lib/gcc/${targetConfig}/${mygcc.version}" >> $out/nix-support/cc-ldflags
|
||||
echo "-L${mygcc.cc.lib}/lib" >> $out/nix-support/cc-ldflags
|
||||
echo "-L${intel-compiler-shared}/lib" >> $out/nix-support/cc-ldflags
|
||||
echo "-L${cc}/lib" >> $out/nix-support/cc-ldflags
|
||||
|
||||
# Need the gcc in the path
|
||||
# FIXME: We should find a better way to modify the PATH instead of using
|
||||
# this ugly hack. See https://jungle.bsc.es/git/rarias/bscpkgs/issues/9
|
||||
echo 'path_backup="${mygcc}/bin:$path_backup"' >> $out/nix-support/cc-wrapper-hook
|
||||
|
||||
# Disable hardening by default
|
||||
echo "" > $out/nix-support/add-hardening.sh
|
||||
'' + extraBuild;
|
||||
}).overrideAttrs (old: {
|
||||
installPhase = old.installPhase + extraInstall;
|
||||
});
|
||||
|
||||
icx-wrapper = wrapIntel rec {
|
||||
cc = intel-compiler;
|
||||
mygcc = gcc;
|
||||
extraBuild = ''
|
||||
wrap icx $wrapper $ccPath/icx
|
||||
wrap icpx $wrapper $ccPath/icpx
|
||||
echo "-isystem ${cc}/include/icx" >> $out/nix-support/cc-cflags
|
||||
echo "--gcc-toolchain=${mygcc.cc}" >> $out/nix-support/cc-cflags
|
||||
'';
|
||||
extraInstall = ''
|
||||
export named_cc="icx"
|
||||
export named_cxx="icpx"
|
||||
'';
|
||||
};
|
||||
|
||||
# Legacy
|
||||
icc-wrapper = wrapIntel rec {
|
||||
cc = intel-compiler;
|
||||
# Intel icc classic compiler tries to behave like the gcc found in $PATH.
|
||||
# EVEN if it doesn't support some of the features. See:
|
||||
# https://community.intel.com/t5/Intel-C-Compiler/builtin-shuffle-GCC-compatibility-and-has-builtin/td-p/1143619
|
||||
mygcc = gcc;
|
||||
extraBuild = ''
|
||||
wrap icc $wrapper $ccPath/icc
|
||||
wrap icpc $wrapper $ccPath/icpc
|
||||
echo "-isystem ${cc}/include/icc" >> $out/nix-support/cc-cflags
|
||||
'';
|
||||
extraInstall = ''
|
||||
export named_cc="icc"
|
||||
export named_cxx="icpc"
|
||||
'';
|
||||
};
|
||||
|
||||
ifort-wrapper = wrapIntel rec {
|
||||
cc = intel-compiler-fortran;
|
||||
mygcc = gcc;
|
||||
extraBuild = ''
|
||||
wrap ifort $wrapper $ccPath/ifort
|
||||
'';
|
||||
extraInstall = ''
|
||||
export named_fc="ifort"
|
||||
'';
|
||||
};
|
||||
|
||||
stdenv-icc = stdenv.override {
|
||||
cc = icc-wrapper;
|
||||
allowedRequisites = null;
|
||||
};
|
||||
|
||||
stdenv-icx = stdenv.override {
|
||||
cc = icx-wrapper;
|
||||
allowedRequisites = null;
|
||||
};
|
||||
|
||||
stdenv-ifort = stdenv.override {
|
||||
cc = ifort-wrapper;
|
||||
allowedRequisites = null;
|
||||
};
|
||||
|
||||
in
|
||||
{
|
||||
inherit intel-mpi;
|
||||
icx = icx-wrapper;
|
||||
icc = icc-wrapper;
|
||||
ifort = ifort-wrapper;
|
||||
|
||||
stdenv = stdenv-icx;
|
||||
stdenv-icc = stdenv-icc;
|
||||
stdenv-ifort = stdenv-ifort;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user