Compare commits
3 Commits
master
...
lake2-ipoi
Author | SHA1 | Date | |
---|---|---|---|
06c75eb3d9 | |||
172a7ffc24 | |||
0744a8a641 |
Binary file not shown.
@ -150,27 +150,3 @@ And update grub.
|
|||||||
```
|
```
|
||||||
# nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v
|
# nix build .#nixosConfigurations.xeon02.config.system.build.kexecTree -v
|
||||||
```
|
```
|
||||||
|
|
||||||
## Chain NixOS in same disk with other systems
|
|
||||||
|
|
||||||
To install NixOS on a partition along another system which controls the GRUB,
|
|
||||||
first disable the grub device, so the GRUB is not installed in the disk by
|
|
||||||
NixOS (only the /boot files will be generated):
|
|
||||||
|
|
||||||
```
|
|
||||||
boot.loader.grub.device = "nodev";
|
|
||||||
```
|
|
||||||
|
|
||||||
Then add the following entry to the old GRUB configuration:
|
|
||||||
|
|
||||||
```
|
|
||||||
menuentry 'NixOS' {
|
|
||||||
insmod chain
|
|
||||||
search --no-floppy --label nixos --set root
|
|
||||||
configfile /boot/grub/grub.cfg
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The partition with NixOS must have the label "nixos" for it to be found. New
|
|
||||||
system configuration entries will be stored in the GRUB configuration managed
|
|
||||||
by NixOS, so there is no need to change the old GRUB settings.
|
|
||||||
|
59
flake.lock
59
flake.lock
@ -6,15 +6,14 @@
|
|||||||
"home-manager": "home-manager",
|
"home-manager": "home-manager",
|
||||||
"nixpkgs": [
|
"nixpkgs": [
|
||||||
"nixpkgs"
|
"nixpkgs"
|
||||||
],
|
]
|
||||||
"systems": "systems"
|
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1720546205,
|
"lastModified": 1690228878,
|
||||||
"narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=",
|
"narHash": "sha256-9Xe7JV0krp4RJC9W9W9WutZVlw6BlHTFMiUP/k48LQY=",
|
||||||
"owner": "ryantm",
|
"owner": "ryantm",
|
||||||
"repo": "agenix",
|
"repo": "agenix",
|
||||||
"rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6",
|
"rev": "d8c973fd228949736dedf61b7f8cc1ece3236792",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@ -24,23 +23,18 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"bscpkgs": {
|
"bscpkgs": {
|
||||||
"inputs": {
|
|
||||||
"nixpkgs": [
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1713974364,
|
"lastModified": 1690560045,
|
||||||
"narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=",
|
"narHash": "sha256-39ZP+FIzlWoN3c43hReBYpStg4RLYw/z7TdxCQmOvTM=",
|
||||||
"ref": "refs/heads/master",
|
"ref": "refs/heads/master",
|
||||||
"rev": "de89197a4a7b162db7df9d41c9d07759d87c5709",
|
"rev": "b4a20d7c3af854b39682484adfd1c7979319f439",
|
||||||
"revCount": 937,
|
"revCount": 841,
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
"url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
"url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"darwin": {
|
"darwin": {
|
||||||
@ -51,11 +45,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1700795494,
|
"lastModified": 1673295039,
|
||||||
"narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
|
"narHash": "sha256-AsdYgE8/GPwcelGgrntlijMg4t3hLFJFCRF3tL5WVjA=",
|
||||||
"owner": "lnl7",
|
"owner": "lnl7",
|
||||||
"repo": "nix-darwin",
|
"repo": "nix-darwin",
|
||||||
"rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
|
"rev": "87b9d090ad39b25b2400029c64825fc2a8868943",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@ -73,11 +67,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1703113217,
|
"lastModified": 1682203081,
|
||||||
"narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
|
"narHash": "sha256-kRL4ejWDhi0zph/FpebFYhzqlOBrk0Pl3dzGEKSAlEw=",
|
||||||
"owner": "nix-community",
|
"owner": "nix-community",
|
||||||
"repo": "home-manager",
|
"repo": "home-manager",
|
||||||
"rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
|
"rev": "32d3e39c491e2f91152c84f8ad8b003420eab0a1",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@ -88,11 +82,11 @@
|
|||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1720957393,
|
"lastModified": 1692447944,
|
||||||
"narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=",
|
"narHash": "sha256-fkJGNjEmTPvqBs215EQU4r9ivecV5Qge5cF/QDLVn3U=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "693bc46d169f5af9c992095736e82c3488bf7dbb",
|
"rev": "d680ded26da5cf104dd2735a51e88d2d8f487b4d",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@ -108,21 +102,6 @@
|
|||||||
"bscpkgs": "bscpkgs",
|
"bscpkgs": "bscpkgs",
|
||||||
"nixpkgs": "nixpkgs"
|
"nixpkgs": "nixpkgs"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"systems": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1681028828,
|
|
||||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": "root",
|
"root": "root",
|
||||||
|
23
flake.nix
23
flake.nix
@ -3,8 +3,7 @@
|
|||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
agenix.url = "github:ryantm/agenix";
|
agenix.url = "github:ryantm/agenix";
|
||||||
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
||||||
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
|
bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git";
|
||||||
bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
|
outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
|
||||||
@ -17,19 +16,15 @@ let
|
|||||||
in
|
in
|
||||||
{
|
{
|
||||||
nixosConfigurations = {
|
nixosConfigurations = {
|
||||||
hut = mkConf "hut";
|
hut = mkConf "hut";
|
||||||
owl1 = mkConf "owl1";
|
owl1 = mkConf "owl1";
|
||||||
owl2 = mkConf "owl2";
|
owl2 = mkConf "owl2";
|
||||||
eudy = mkConf "eudy";
|
eudy = mkConf "eudy";
|
||||||
koro = mkConf "koro";
|
koro = mkConf "koro";
|
||||||
bay = mkConf "bay";
|
bay = mkConf "bay";
|
||||||
lake2 = mkConf "lake2";
|
lake2 = mkConf "lake2";
|
||||||
raccoon = mkConf "raccoon";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {
|
packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs;
|
||||||
bscpkgs = bscpkgs.packages.x86_64-linux;
|
|
||||||
nixpkgs = nixpkgs.legacyPackages.x86_64-linux;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
29
keys.nix
29
keys.nix
@ -1,29 +0,0 @@
|
|||||||
# As agenix needs to parse the secrets from a standalone .nix file, we describe
|
|
||||||
# here all the public keys
|
|
||||||
rec {
|
|
||||||
hosts = {
|
|
||||||
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
|
||||||
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
|
||||||
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
|
||||||
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
|
||||||
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
|
||||||
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
|
||||||
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
|
||||||
};
|
|
||||||
|
|
||||||
hostGroup = with hosts; rec {
|
|
||||||
compute = [ owl1 owl2 ];
|
|
||||||
playground = [ eudy koro ];
|
|
||||||
storage = [ bay lake2 ];
|
|
||||||
monitor = [ hut ];
|
|
||||||
|
|
||||||
system = storage ++ monitor;
|
|
||||||
safe = system ++ compute;
|
|
||||||
all = safe ++ playground;
|
|
||||||
};
|
|
||||||
|
|
||||||
admins = {
|
|
||||||
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
|
||||||
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
|
|
||||||
};
|
|
||||||
}
|
|
@ -2,21 +2,21 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/main.nix
|
||||||
../module/monitoring.nix
|
../common/monitoring.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53562d";
|
||||||
|
|
||||||
boot.kernel.sysctl = {
|
|
||||||
"kernel.yama.ptrace_scope" = lib.mkForce "1";
|
|
||||||
};
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
ceph
|
ceph
|
||||||
];
|
];
|
||||||
|
|
||||||
|
services.slurm = {
|
||||||
|
client.enable = lib.mkForce false;
|
||||||
|
};
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
hostName = "bay";
|
hostName = "bay";
|
||||||
interfaces.eno1.ipv4.addresses = [ {
|
interfaces.eno1.ipv4.addresses = [ {
|
||||||
@ -27,25 +27,16 @@
|
|||||||
address = "10.0.42.40";
|
address = "10.0.42.40";
|
||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
} ];
|
} ];
|
||||||
firewall = {
|
|
||||||
extraCommands = ''
|
|
||||||
# Accept all incoming TCP traffic from lake2
|
|
||||||
iptables -A nixos-fw -p tcp -s lake2 -j nixos-fw-accept
|
|
||||||
# Accept monitoring requests from hut
|
|
||||||
iptables -A nixos-fw -p tcp -s hut -m multiport --dport 9283,9002 -j nixos-fw-accept
|
|
||||||
# Accept all Ceph traffic from the local network
|
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
services.ceph = {
|
services.ceph = {
|
||||||
enable = true;
|
enable = true;
|
||||||
global = {
|
global = {
|
||||||
fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b";
|
fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b";
|
||||||
monHost = "10.0.40.40";
|
monHost = "10.0.42.40";
|
||||||
monInitialMembers = "bay";
|
monInitialMembers = "10.0.42.40";
|
||||||
clusterNetwork = "10.0.40.40/24"; # Use Ethernet only
|
publicNetwork = "10.0.42.40/24";
|
||||||
|
clusterNetwork = "10.0.42.40/24";
|
||||||
};
|
};
|
||||||
extraConfig = {
|
extraConfig = {
|
||||||
# Only log to stderr so it appears in the journal
|
# Only log to stderr so it appears in the journal
|
||||||
@ -59,7 +50,7 @@
|
|||||||
enable = true;
|
enable = true;
|
||||||
daemons = [ "mds0" "mds1" ];
|
daemons = [ "mds0" "mds1" ];
|
||||||
extraConfig = {
|
extraConfig = {
|
||||||
"host" = "bay";
|
"host" = "10.0.42.40";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
mgr = {
|
mgr = {
|
||||||
|
@ -1,20 +0,0 @@
|
|||||||
{
|
|
||||||
# All machines should include this profile.
|
|
||||||
# Includes the basic configuration for an Intel server.
|
|
||||||
imports = [
|
|
||||||
./base/agenix.nix
|
|
||||||
./base/august-shutdown.nix
|
|
||||||
./base/boot.nix
|
|
||||||
./base/env.nix
|
|
||||||
./base/fs.nix
|
|
||||||
./base/hw.nix
|
|
||||||
./base/net.nix
|
|
||||||
./base/nix.nix
|
|
||||||
./base/ntp.nix
|
|
||||||
./base/rev.nix
|
|
||||||
./base/ssh.nix
|
|
||||||
./base/users.nix
|
|
||||||
./base/watchdog.nix
|
|
||||||
./base/zsh.nix
|
|
||||||
];
|
|
||||||
}
|
|
@ -1,9 +0,0 @@
|
|||||||
{ agenix, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [ agenix.nixosModules.default ];
|
|
||||||
|
|
||||||
environment.systemPackages = [
|
|
||||||
agenix.packages.x86_64-linux.default
|
|
||||||
];
|
|
||||||
}
|
|
@ -1,14 +0,0 @@
|
|||||||
{
|
|
||||||
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
|
|
||||||
# hardware from spurious electrical peaks on the yearly electrical cut for
|
|
||||||
# manteinance that starts on August 4th.
|
|
||||||
systemd.timers.august-shutdown = {
|
|
||||||
description = "Shutdown on August 2nd for maintenance";
|
|
||||||
wantedBy = [ "timers.target" ];
|
|
||||||
timerConfig = {
|
|
||||||
OnCalendar = "*-08-02 11:00:00";
|
|
||||||
RandomizedDelaySec = "10min";
|
|
||||||
Unit = "systemd-poweroff.service";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,35 +0,0 @@
|
|||||||
{ pkgs, config, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
|
||||||
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
|
|
||||||
ncdu config.boot.kernelPackages.perf ldns
|
|
||||||
# From bsckgs overlay
|
|
||||||
osumb
|
|
||||||
];
|
|
||||||
|
|
||||||
programs.direnv.enable = true;
|
|
||||||
|
|
||||||
# Increase limits
|
|
||||||
security.pam.loginLimits = [
|
|
||||||
{
|
|
||||||
domain = "*";
|
|
||||||
type = "-";
|
|
||||||
item = "memlock";
|
|
||||||
value = "1048576"; # 1 GiB of mem locked
|
|
||||||
}
|
|
||||||
];
|
|
||||||
|
|
||||||
environment.variables = {
|
|
||||||
EDITOR = "vim";
|
|
||||||
VISUAL = "vim";
|
|
||||||
};
|
|
||||||
|
|
||||||
programs.bash.promptInit = ''
|
|
||||||
PS1="\h\\$ "
|
|
||||||
'';
|
|
||||||
|
|
||||||
time.timeZone = "Europe/Madrid";
|
|
||||||
i18n.defaultLocale = "en_DK.UTF-8";
|
|
||||||
}
|
|
@ -1,19 +0,0 @@
|
|||||||
{ pkgs, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
networking = {
|
|
||||||
enableIPv6 = false;
|
|
||||||
useDHCP = false;
|
|
||||||
|
|
||||||
firewall = {
|
|
||||||
enable = true;
|
|
||||||
allowedTCPPorts = [ 22 ];
|
|
||||||
};
|
|
||||||
|
|
||||||
hosts = {
|
|
||||||
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
|
|
||||||
"84.88.51.152" = [ "raccoon" ];
|
|
||||||
"84.88.51.142" = [ "raccoon-ipmi" ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,57 +0,0 @@
|
|||||||
{ pkgs, nixpkgs, bscpkgs, theFlake, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
nixpkgs.overlays = [
|
|
||||||
bscpkgs.bscOverlay
|
|
||||||
(import ../../../pkgs/overlay.nix)
|
|
||||||
];
|
|
||||||
|
|
||||||
nix = {
|
|
||||||
nixPath = [
|
|
||||||
"nixpkgs=${nixpkgs}"
|
|
||||||
"jungle=${theFlake.outPath}"
|
|
||||||
];
|
|
||||||
|
|
||||||
registry = {
|
|
||||||
nixpkgs.flake = nixpkgs;
|
|
||||||
jungle.flake = theFlake;
|
|
||||||
};
|
|
||||||
|
|
||||||
settings = {
|
|
||||||
experimental-features = [ "nix-command" "flakes" ];
|
|
||||||
sandbox = "relaxed";
|
|
||||||
trusted-users = [ "@wheel" ];
|
|
||||||
flake-registry = pkgs.writeText "global-registry.json"
|
|
||||||
''{"flakes":[],"version":2}'';
|
|
||||||
};
|
|
||||||
|
|
||||||
gc = {
|
|
||||||
automatic = true;
|
|
||||||
dates = "weekly";
|
|
||||||
options = "--delete-older-than 30d";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# The nix-gc.service can begin its execution *before* /home is mounted,
|
|
||||||
# causing it to remove all gcroots considering them as stale, as it cannot
|
|
||||||
# access the symlink. To prevent this problem, we force the service to wait
|
|
||||||
# until /home is mounted as well as other remote FS like /ceph.
|
|
||||||
systemd.services.nix-gc = {
|
|
||||||
# Start remote-fs.target if not already being started and fail if it fails
|
|
||||||
# to start. It will also be stopped if the remote-fs.target fails after
|
|
||||||
# starting successfully.
|
|
||||||
bindsTo = [ "remote-fs.target" ];
|
|
||||||
# Wait until remote-fs.target fully starts before starting this one.
|
|
||||||
after = [ "remote-fs.target"];
|
|
||||||
# Ensure we can access a remote path inside /home
|
|
||||||
unitConfig.ConditionPathExists = "/home/Computational";
|
|
||||||
};
|
|
||||||
|
|
||||||
# This value determines the NixOS release from which the default
|
|
||||||
# settings for stateful data, like file locations and database versions
|
|
||||||
# on your system were taken. It‘s perfectly fine and recommended to leave
|
|
||||||
# this value at the release version of the first install of this system.
|
|
||||||
# Before changing this value read the documentation for this option
|
|
||||||
# (e.g. man configuration.nix or on https://nixos.org/nixos/options.html).
|
|
||||||
system.stateVersion = "22.11"; # Did you read the comment?
|
|
||||||
}
|
|
@ -1,21 +0,0 @@
|
|||||||
{ theFlake, ... }:
|
|
||||||
|
|
||||||
let
|
|
||||||
# Prevent building a configuration without revision
|
|
||||||
rev = if theFlake ? rev then theFlake.rev
|
|
||||||
else throw ("Refusing to build from a dirty Git tree!");
|
|
||||||
in {
|
|
||||||
# Save the commit of the config in /etc/configrev
|
|
||||||
environment.etc.configrev.text = rev + "\n";
|
|
||||||
|
|
||||||
# Keep a log with the config over time
|
|
||||||
system.activationScripts.configRevLog.text = ''
|
|
||||||
BOOTED=$(cat /run/booted-system/etc/configrev 2>/dev/null || echo unknown)
|
|
||||||
CURRENT=$(cat /run/current-system/etc/configrev 2>/dev/null || echo unknown)
|
|
||||||
NEXT=${rev}
|
|
||||||
DATENOW=$(date --iso-8601=seconds)
|
|
||||||
echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log
|
|
||||||
'';
|
|
||||||
|
|
||||||
system.configurationRevision = rev;
|
|
||||||
}
|
|
@ -1,22 +0,0 @@
|
|||||||
{ lib, ... }:
|
|
||||||
|
|
||||||
let
|
|
||||||
keys = import ../../../keys.nix;
|
|
||||||
hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts;
|
|
||||||
in
|
|
||||||
{
|
|
||||||
# Enable the OpenSSH daemon.
|
|
||||||
services.openssh.enable = true;
|
|
||||||
|
|
||||||
# Connect to intranet git hosts via proxy
|
|
||||||
programs.ssh.extraConfig = ''
|
|
||||||
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
|
|
||||||
User git
|
|
||||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
|
||||||
'';
|
|
||||||
|
|
||||||
programs.ssh.knownHosts = hostsKeys // {
|
|
||||||
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
|
||||||
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,91 +0,0 @@
|
|||||||
{ pkgs, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
zsh-completions
|
|
||||||
nix-zsh-completions
|
|
||||||
];
|
|
||||||
|
|
||||||
programs.zsh = {
|
|
||||||
enable = true;
|
|
||||||
histSize = 1000000;
|
|
||||||
|
|
||||||
shellInit = ''
|
|
||||||
# Disable new user prompt
|
|
||||||
if [ ! -e ~/.zshrc ]; then
|
|
||||||
touch ~/.zshrc
|
|
||||||
fi
|
|
||||||
'';
|
|
||||||
|
|
||||||
promptInit = ''
|
|
||||||
# Note that to manually override this in ~/.zshrc you should run `prompt off`
|
|
||||||
# before setting your PS1 and etc. Otherwise this will likely to interact with
|
|
||||||
# your ~/.zshrc configuration in unexpected ways as the default prompt sets
|
|
||||||
# a lot of different prompt variables.
|
|
||||||
autoload -U promptinit && promptinit && prompt default && setopt prompt_sp
|
|
||||||
'';
|
|
||||||
|
|
||||||
# Taken from Ulli Kehrle config:
|
|
||||||
# https://git.hrnz.li/Ulli/nixos/src/commit/2e203b8d8d671f4e3ced0f1744a51d5c6ee19846/profiles/shell.nix#L199-L205
|
|
||||||
interactiveShellInit = ''
|
|
||||||
source "${pkgs.zsh-history-substring-search}/share/zsh-history-substring-search/zsh-history-substring-search.zsh"
|
|
||||||
|
|
||||||
# Save history immediately, but only load it when the shell starts
|
|
||||||
setopt inc_append_history
|
|
||||||
|
|
||||||
# dircolors doesn't support alacritty:
|
|
||||||
# https://lists.gnu.org/archive/html/bug-coreutils/2019-05/msg00029.html
|
|
||||||
export LS_COLORS='rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=00:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.avif=01;35:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:*~=00;90:*#=00;90:*.bak=00;90:*.old=00;90:*.orig=00;90:*.part=00;90:*.rej=00;90:*.swp=00;90:*.tmp=00;90:*.dpkg-dist=00;90:*.dpkg-old=00;90:*.ucf-dist=00;90:*.ucf-new=00;90:*.ucf-old=00;90:*.rpmnew=00;90:*.rpmorig=00;90:*.rpmsave=00;90:';
|
|
||||||
|
|
||||||
# From Arch Linux and GRML
|
|
||||||
bindkey "^R" history-incremental-pattern-search-backward
|
|
||||||
bindkey "^S" history-incremental-pattern-search-forward
|
|
||||||
|
|
||||||
# Auto rehash for new binaries
|
|
||||||
zstyle ':completion:*' rehash true
|
|
||||||
# show a nice menu with the matches
|
|
||||||
zstyle ':completion:*' menu yes select
|
|
||||||
|
|
||||||
bindkey '^[OA' history-substring-search-up # Up
|
|
||||||
bindkey '^[[A' history-substring-search-up # Up
|
|
||||||
|
|
||||||
bindkey '^[OB' history-substring-search-down # Down
|
|
||||||
bindkey '^[[B' history-substring-search-down # Down
|
|
||||||
|
|
||||||
bindkey '\e[1~' beginning-of-line # Home
|
|
||||||
bindkey '\e[7~' beginning-of-line # Home
|
|
||||||
bindkey '\e[H' beginning-of-line # Home
|
|
||||||
bindkey '\eOH' beginning-of-line # Home
|
|
||||||
|
|
||||||
bindkey '\e[4~' end-of-line # End
|
|
||||||
bindkey '\e[8~' end-of-line # End
|
|
||||||
bindkey '\e[F ' end-of-line # End
|
|
||||||
bindkey '\eOF' end-of-line # End
|
|
||||||
|
|
||||||
bindkey '^?' backward-delete-char # Backspace
|
|
||||||
bindkey '\e[3~' delete-char # Del
|
|
||||||
# bindkey '\e[3;5~' delete-char # sometimes Del, sometimes C-Del
|
|
||||||
bindkey '\e[2~' overwrite-mode # Ins
|
|
||||||
|
|
||||||
bindkey '^H' backward-kill-word # C-Backspace
|
|
||||||
|
|
||||||
bindkey '5~' kill-word # C-Del
|
|
||||||
bindkey '^[[3;5~' kill-word # C-Del
|
|
||||||
bindkey '^[[3^' kill-word # C-Del
|
|
||||||
|
|
||||||
bindkey "^[[1;5H" backward-kill-line # C-Home
|
|
||||||
bindkey "^[[7^" backward-kill-line # C-Home
|
|
||||||
|
|
||||||
bindkey "^[[1;5F" kill-line # C-End
|
|
||||||
bindkey "^[[8^" kill-line # C-End
|
|
||||||
|
|
||||||
bindkey '^[[1;5C' forward-word # C-Right
|
|
||||||
bindkey '^[0c' forward-word # C-Right
|
|
||||||
bindkey '^[[5C' forward-word # C-Right
|
|
||||||
|
|
||||||
bindkey '^[[1;5D' backward-word # C-Left
|
|
||||||
bindkey '^[0d' backward-word # C-Left
|
|
||||||
bindkey '^[[5D' backward-word # C-Left
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
}
|
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
# Use the GRUB 2 boot loader.
|
# Use the GRUB 2 boot loader.
|
||||||
boot.loader.grub.enable = true;
|
boot.loader.grub.enable = lib.mkForce true;
|
||||||
|
|
||||||
# Enable GRUB2 serial console
|
# Enable GRUB2 serial console
|
||||||
boot.loader.grub.extraConfig = ''
|
boot.loader.grub.extraConfig = ''
|
||||||
@ -11,12 +11,14 @@
|
|||||||
terminal_output --append serial
|
terminal_output --append serial
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
# Enable serial console
|
||||||
|
boot.kernelParams = [
|
||||||
|
"console=tty1"
|
||||||
|
"console=ttyS0,115200"
|
||||||
|
];
|
||||||
|
|
||||||
boot.kernel.sysctl = {
|
boot.kernel.sysctl = {
|
||||||
"kernel.perf_event_paranoid" = lib.mkDefault "-1";
|
"kernel.perf_event_paranoid" = lib.mkDefault "-1";
|
||||||
|
|
||||||
# Allow ptracing (i.e. attach with GDB) any process of the same user, see:
|
|
||||||
# https://www.kernel.org/doc/Documentation/security/Yama.txt
|
|
||||||
"kernel.yama.ptrace_scope" = "0";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
boot.kernelPackages = pkgs.linuxPackages_latest;
|
boot.kernelPackages = pkgs.linuxPackages_latest;
|
@ -6,19 +6,20 @@
|
|||||||
fsType = "ext4";
|
fsType = "ext4";
|
||||||
};
|
};
|
||||||
|
|
||||||
# Trim unused blocks weekly
|
|
||||||
services.fstrim.enable = true;
|
|
||||||
|
|
||||||
swapDevices =
|
swapDevices =
|
||||||
[ { device = "/dev/disk/by-label/swap"; }
|
[ { device = "/dev/disk/by-label/swap"; }
|
||||||
];
|
];
|
||||||
|
|
||||||
|
# Mount the home via NFS
|
||||||
|
fileSystems."/home" = {
|
||||||
|
device = "10.0.40.30:/home";
|
||||||
|
fsType = "nfs";
|
||||||
|
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
||||||
|
};
|
||||||
|
|
||||||
# Tracing
|
# Tracing
|
||||||
fileSystems."/sys/kernel/tracing" = {
|
fileSystems."/sys/kernel/tracing" = {
|
||||||
device = "none";
|
device = "none";
|
||||||
fsType = "tracefs";
|
fsType = "tracefs";
|
||||||
};
|
};
|
||||||
|
|
||||||
# Mount a tmpfs into /tmp
|
|
||||||
boot.tmp.useTmpfs = true;
|
|
||||||
}
|
}
|
94
m/common/main.nix
Normal file
94
m/common/main.nix
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
{ config, pkgs, nixpkgs, bscpkgs, agenix, theFlake, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
./boot.nix
|
||||||
|
./fs.nix
|
||||||
|
./hw.nix
|
||||||
|
./net.nix
|
||||||
|
./ntp.nix
|
||||||
|
./slurm.nix
|
||||||
|
./ssh.nix
|
||||||
|
./users.nix
|
||||||
|
./watchdog.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
nixpkgs.overlays = [
|
||||||
|
bscpkgs.bscOverlay
|
||||||
|
(import ../../pkgs/overlay.nix)
|
||||||
|
];
|
||||||
|
|
||||||
|
nix.nixPath = [
|
||||||
|
"nixpkgs=${nixpkgs}"
|
||||||
|
"bscpkgs=${bscpkgs}"
|
||||||
|
"jungle=${theFlake.outPath}"
|
||||||
|
];
|
||||||
|
|
||||||
|
nix.registry.nixpkgs.flake = nixpkgs;
|
||||||
|
nix.registry.bscpkgs.flake = bscpkgs;
|
||||||
|
nix.registry.jungle.flake = theFlake;
|
||||||
|
|
||||||
|
system.configurationRevision =
|
||||||
|
if theFlake ? rev
|
||||||
|
then theFlake.rev
|
||||||
|
else throw ("Refusing to build from a dirty Git tree!");
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
||||||
|
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
|
||||||
|
ncdu config.boot.kernelPackages.perf ldns
|
||||||
|
# From bsckgs overlay
|
||||||
|
bsc.osumb
|
||||||
|
];
|
||||||
|
|
||||||
|
systemd.services."serial-getty@ttyS0" = {
|
||||||
|
enable = true;
|
||||||
|
wantedBy = [ "getty.target" ];
|
||||||
|
serviceConfig.Restart = "always";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Increase limits
|
||||||
|
security.pam.loginLimits = [
|
||||||
|
{
|
||||||
|
domain = "*";
|
||||||
|
type = "-";
|
||||||
|
item = "memlock";
|
||||||
|
value = "1048576"; # 1 GiB of mem locked
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
time.timeZone = "Europe/Madrid";
|
||||||
|
i18n.defaultLocale = "en_DK.UTF-8";
|
||||||
|
|
||||||
|
environment.variables = {
|
||||||
|
EDITOR = "vim";
|
||||||
|
VISUAL = "vim";
|
||||||
|
};
|
||||||
|
|
||||||
|
nix.settings.experimental-features = [ "nix-command" "flakes" ];
|
||||||
|
nix.settings.sandbox = "relaxed";
|
||||||
|
nix.settings.trusted-users = [ "@wheel" ];
|
||||||
|
nix.gc.automatic = true;
|
||||||
|
nix.gc.dates = "weekly";
|
||||||
|
nix.gc.options = "--delete-older-than 30d";
|
||||||
|
|
||||||
|
programs.zsh.enable = true;
|
||||||
|
programs.zsh.histSize = 100000;
|
||||||
|
|
||||||
|
programs.bash.promptInit = ''
|
||||||
|
PS1="\h\\$ "
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Copy the NixOS configuration file and link it from the resulting system
|
||||||
|
# (/run/current-system/configuration.nix). This is useful in case you
|
||||||
|
# accidentally delete configuration.nix.
|
||||||
|
#system.copySystemConfiguration = true;
|
||||||
|
|
||||||
|
# This value determines the NixOS release from which the default
|
||||||
|
# settings for stateful data, like file locations and database versions
|
||||||
|
# on your system were taken. It‘s perfectly fine and recommended to leave
|
||||||
|
# this value at the release version of the first install of this system.
|
||||||
|
# Before changing this value read the documentation for this option
|
||||||
|
# (e.g. man configuration.nix or on https://nixos.org/nixos/options.html).
|
||||||
|
system.stateVersion = "22.11"; # Did you read the comment?
|
||||||
|
}
|
@ -6,11 +6,12 @@
|
|||||||
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
|
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
|
enableIPv6 = false;
|
||||||
|
useDHCP = false;
|
||||||
defaultGateway = "10.0.40.30";
|
defaultGateway = "10.0.40.30";
|
||||||
nameservers = ["8.8.8.8"];
|
nameservers = ["8.8.8.8"];
|
||||||
|
|
||||||
proxy = {
|
proxy = {
|
||||||
default = "http://hut:23080/";
|
default = "http://localhost:23080/";
|
||||||
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40";
|
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40";
|
||||||
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
|
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
|
||||||
# https://github.com/golang/go/issues/16715
|
# https://github.com/golang/go/issues/16715
|
||||||
@ -18,23 +19,21 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
firewall = {
|
firewall = {
|
||||||
extraCommands = ''
|
enable = true;
|
||||||
# Prevent ssfhead from contacting our slurmd daemon
|
allowedTCPPorts = [ 22 ];
|
||||||
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
|
|
||||||
# But accept traffic to slurm ports from any other node in the subnet
|
# FIXME: For slurmd as it requests the compute nodes to connect to us
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
|
allowedTCPPortRanges = [ { from=1024; to=65535; } ];
|
||||||
# We also need to open the srun port range
|
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
|
||||||
'';
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extraHosts = ''
|
extraHosts = ''
|
||||||
10.0.40.30 ssfhead
|
10.0.40.30 ssfhead
|
||||||
|
84.88.53.236 ssfhead.bsc.es ssfhead
|
||||||
|
|
||||||
# Node Entry for node: mds01 (ID=72)
|
# Node Entry for node: mds01 (ID=72)
|
||||||
10.0.40.40 bay mds01 mds01-eth0
|
10.0.40.40 bay mds01 mds01-eth0
|
||||||
10.0.42.40 bay-ib mds01-ib0
|
10.0.42.40 mds01-ib0
|
||||||
10.0.40.141 bay-ipmi mds01-ipmi0
|
10.0.40.141 mds01-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: oss01 (ID=73)
|
# Node Entry for node: oss01 (ID=73)
|
||||||
10.0.40.41 oss01 oss01-eth0
|
10.0.40.41 oss01 oss01-eth0
|
||||||
@ -43,18 +42,18 @@
|
|||||||
|
|
||||||
# Node Entry for node: oss02 (ID=74)
|
# Node Entry for node: oss02 (ID=74)
|
||||||
10.0.40.42 lake2 oss02 oss02-eth0
|
10.0.40.42 lake2 oss02 oss02-eth0
|
||||||
10.0.42.42 lake2-ib oss02-ib0
|
10.0.42.42 oss02-ib0
|
||||||
10.0.40.143 lake2-ipmi oss02-ipmi0
|
10.0.40.143 oss02-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon01 (ID=15)
|
# Node Entry for node: xeon01 (ID=15)
|
||||||
10.0.40.1 owl1 xeon01 xeon01-eth0
|
10.0.40.1 owl1 xeon01 xeon01-eth0
|
||||||
10.0.42.1 owl1-ib xeon01-ib0
|
10.0.42.1 xeon01-ib0
|
||||||
10.0.40.101 owl1-ipmi xeon01-ipmi0
|
10.0.40.101 xeon01-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon02 (ID=16)
|
# Node Entry for node: xeon02 (ID=16)
|
||||||
10.0.40.2 owl2 xeon02 xeon02-eth0
|
10.0.40.2 owl2 xeon02 xeon02-eth0
|
||||||
10.0.42.2 owl2-ib xeon02-ib0
|
10.0.42.2 xeon02-ib0
|
||||||
10.0.40.102 owl2-ipmi xeon02-ipmi0
|
10.0.40.102 xeon02-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon03 (ID=17)
|
# Node Entry for node: xeon03 (ID=17)
|
||||||
10.0.40.3 xeon03 xeon03-eth0
|
10.0.40.3 xeon03 xeon03-eth0
|
||||||
@ -68,8 +67,8 @@
|
|||||||
|
|
||||||
# Node Entry for node: xeon05 (ID=19)
|
# Node Entry for node: xeon05 (ID=19)
|
||||||
10.0.40.5 koro xeon05 xeon05-eth0
|
10.0.40.5 koro xeon05 xeon05-eth0
|
||||||
10.0.42.5 koro-ib xeon05-ib0
|
10.0.42.5 xeon05-ib0
|
||||||
10.0.40.105 koro-ipmi xeon05-ipmi0
|
10.0.40.105 xeon05-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon06 (ID=20)
|
# Node Entry for node: xeon06 (ID=20)
|
||||||
10.0.40.6 xeon06 xeon06-eth0
|
10.0.40.6 xeon06 xeon06-eth0
|
||||||
@ -78,13 +77,13 @@
|
|||||||
|
|
||||||
# Node Entry for node: xeon07 (ID=21)
|
# Node Entry for node: xeon07 (ID=21)
|
||||||
10.0.40.7 hut xeon07 xeon07-eth0
|
10.0.40.7 hut xeon07 xeon07-eth0
|
||||||
10.0.42.7 hut-ib xeon07-ib0
|
10.0.42.7 xeon07-ib0
|
||||||
10.0.40.107 hut-ipmi xeon07-ipmi0
|
10.0.40.107 xeon07-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon08 (ID=22)
|
# Node Entry for node: xeon08 (ID=22)
|
||||||
10.0.40.8 eudy xeon08 xeon08-eth0
|
10.0.40.8 eudy xeon08 xeon08-eth0
|
||||||
10.0.42.8 eudy-ib xeon08-ib0
|
10.0.42.8 xeon08-ib0
|
||||||
10.0.40.108 eudy-ipmi xeon08-ipmi0
|
10.0.40.108 xeon08-ipmi0
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
}
|
}
|
42
m/common/slurm.nix
Normal file
42
m/common/slurm.nix
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{ lib, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
systemd.services.slurmd.serviceConfig = {
|
||||||
|
# Kill all processes in the control group on stop/restart. This will kill
|
||||||
|
# all the jobs running, so ensure that we only upgrade when the nodes are
|
||||||
|
# not in use. See:
|
||||||
|
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
||||||
|
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
||||||
|
KillMode = lib.mkForce "control-group";
|
||||||
|
};
|
||||||
|
services.slurm = {
|
||||||
|
client.enable = true;
|
||||||
|
controlMachine = "hut";
|
||||||
|
clusterName = "jungle";
|
||||||
|
nodeName = [
|
||||||
|
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
||||||
|
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
||||||
|
];
|
||||||
|
|
||||||
|
# See slurm.conf(5) for more details about these options.
|
||||||
|
extraConfig = ''
|
||||||
|
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
||||||
|
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
||||||
|
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
||||||
|
# library in SLURM (--mpi=pmix). See more details here:
|
||||||
|
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
||||||
|
MpiDefault=pmix
|
||||||
|
|
||||||
|
# When a node reboots return that node to the slurm queue as soon as it
|
||||||
|
# becomes operative again.
|
||||||
|
ReturnToService=2
|
||||||
|
|
||||||
|
# Track all processes by using a cgroup
|
||||||
|
ProctrackType=proctrack/cgroup
|
||||||
|
|
||||||
|
# Enable task/affinity to allow the jobs to run in a specified subset of
|
||||||
|
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||||
|
TaskPlugin=task/affinity,task/cgroup
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
24
m/common/ssh.nix
Normal file
24
m/common/ssh.nix
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
# Enable the OpenSSH daemon.
|
||||||
|
services.openssh.enable = true;
|
||||||
|
|
||||||
|
# Connect to intranet git hosts via proxy
|
||||||
|
programs.ssh.extraConfig = ''
|
||||||
|
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
|
||||||
|
User git
|
||||||
|
ProxyCommand nc -X connect -x localhost:23080 %h %p
|
||||||
|
'';
|
||||||
|
|
||||||
|
programs.ssh.knownHosts = {
|
||||||
|
"hut".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1";
|
||||||
|
"owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv";
|
||||||
|
"owl2".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK";
|
||||||
|
"eudy".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG";
|
||||||
|
"koro".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67";
|
||||||
|
|
||||||
|
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
||||||
|
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
||||||
|
};
|
||||||
|
}
|
@ -1,10 +1,6 @@
|
|||||||
{ pkgs, ... }:
|
{ ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
|
||||||
../../module/jungle-users.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
users = {
|
users = {
|
||||||
mutableUsers = false;
|
mutableUsers = false;
|
||||||
users = {
|
users = {
|
||||||
@ -30,7 +26,6 @@
|
|||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio"
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal"
|
||||||
];
|
];
|
||||||
shell = pkgs.zsh;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
arocanon = {
|
arocanon = {
|
||||||
@ -46,73 +41,18 @@
|
|||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdphWxLAEekicZ/WBrvP7phMyxKSSuLAZBovNX+hZXQ aleix@kerneland"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
};
|
|
||||||
|
|
||||||
jungleUsers = {
|
|
||||||
rpenacob = {
|
rpenacob = {
|
||||||
uid = 2761;
|
uid = 2761;
|
||||||
isNormalUser = true;
|
isNormalUser = true;
|
||||||
home = "/home/Computational/rpenacob";
|
home = "/home/Computational/rpenacob";
|
||||||
description = "Raúl Peñacoba";
|
description = "Raúl Peñacoba";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "owl1" "owl2" "hut" ];
|
|
||||||
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
|
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
anavarro = {
|
|
||||||
uid = 1037;
|
|
||||||
isNormalUser = true;
|
|
||||||
home = "/home/Computational/anavarro";
|
|
||||||
description = "Antoni Navarro";
|
|
||||||
group = "Computational";
|
|
||||||
hosts = [ "hut" "raccoon" ];
|
|
||||||
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
|
|
||||||
openssh.authorizedKeys.keys = [
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
abonerib = {
|
|
||||||
uid = 4541;
|
|
||||||
isNormalUser = true;
|
|
||||||
home = "/home/Computational/abonerib";
|
|
||||||
description = "Aleix Boné";
|
|
||||||
group = "Computational";
|
|
||||||
hosts = [ "owl1" "owl2" "hut" "raccoon" ];
|
|
||||||
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
|
|
||||||
openssh.authorizedKeys.keys = [
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
vlopez = {
|
|
||||||
uid = 4334;
|
|
||||||
isNormalUser = true;
|
|
||||||
home = "/home/Computational/vlopez";
|
|
||||||
description = "Victor López";
|
|
||||||
group = "Computational";
|
|
||||||
hosts = [ "koro" ];
|
|
||||||
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
|
|
||||||
openssh.authorizedKeys.keys = [
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
dbautist = {
|
|
||||||
uid = 5649;
|
|
||||||
isNormalUser = true;
|
|
||||||
home = "/home/Computational/dbautist";
|
|
||||||
description = "Dylan Bautista Cases";
|
|
||||||
group = "Computational";
|
|
||||||
hosts = [ "hut" ];
|
|
||||||
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
|
|
||||||
openssh.authorizedKeys.keys = [
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
groups = {
|
groups = {
|
@ -1,9 +0,0 @@
|
|||||||
{
|
|
||||||
# Provides the base system for a xeon node.
|
|
||||||
imports = [
|
|
||||||
./base.nix
|
|
||||||
./xeon/fs.nix
|
|
||||||
./xeon/console.nix
|
|
||||||
./xeon/net.nix
|
|
||||||
];
|
|
||||||
}
|
|
@ -1,14 +0,0 @@
|
|||||||
{
|
|
||||||
# Restart the serial console
|
|
||||||
systemd.services."serial-getty@ttyS0" = {
|
|
||||||
enable = true;
|
|
||||||
wantedBy = [ "getty.target" ];
|
|
||||||
serviceConfig.Restart = "always";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Enable serial console
|
|
||||||
boot.kernelParams = [
|
|
||||||
"console=tty1"
|
|
||||||
"console=ttyS0,115200"
|
|
||||||
];
|
|
||||||
}
|
|
@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
# Mount the home via NFS
|
|
||||||
fileSystems."/home" = {
|
|
||||||
device = "10.0.40.30:/home";
|
|
||||||
fsType = "nfs";
|
|
||||||
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
|
||||||
};
|
|
||||||
}
|
|
@ -2,14 +2,14 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/main.nix
|
||||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||||
|
|
||||||
./kernel/kernel.nix
|
./kernel/kernel.nix
|
||||||
./cpufreq.nix
|
./cpufreq.nix
|
||||||
./fs.nix
|
./fs.nix
|
||||||
./users.nix
|
./users.nix
|
||||||
../module/debuginfod.nix
|
./slurm.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
# Select this using the ID to avoid mismatches
|
# Select this using the ID to avoid mismatches
|
||||||
|
@ -21,9 +21,9 @@ let
|
|||||||
# configfile = if lockdep then ./configs/lockdep else ./configs/defconfig;
|
# configfile = if lockdep then ./configs/lockdep else ./configs/defconfig;
|
||||||
#};
|
#};
|
||||||
|
|
||||||
kernel = nixos-fcs;
|
kernel = nixos-fcsv3;
|
||||||
|
|
||||||
nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
|
nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
|
||||||
version = "6.2.8";
|
version = "6.2.8";
|
||||||
src = builtins.fetchGit {
|
src = builtins.fetchGit {
|
||||||
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
||||||
@ -40,13 +40,35 @@ let
|
|||||||
};
|
};
|
||||||
kernelPatches = [];
|
kernelPatches = [];
|
||||||
extraMeta.branch = lib.versions.majorMinor version;
|
extraMeta.branch = lib.versions.majorMinor version;
|
||||||
}));
|
});
|
||||||
|
|
||||||
nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";};
|
nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";};
|
||||||
nixos-fcs-lockstat = nixos-fcs.override {
|
nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";};
|
||||||
|
nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";};
|
||||||
|
|
||||||
|
# always use fcs_sched_setaffinity
|
||||||
|
#nixos-debug = nixos-fcs-kernel {gitCommit = "7d0bf285fca92badc8df3c9907a9ab30db4418aa";};
|
||||||
|
# remove need_check_cgroup
|
||||||
|
#nixos-debug = nixos-fcs-kernel {gitCommit = "4cc4efaab5e4a0bfa3089e935215b981c1922919";};
|
||||||
|
# merge again fcs_wake and fcs_wait
|
||||||
|
#nixos-debug = nixos-fcs-kernel {gitCommit = "40c6f72f4ae54b0b636b193ac0648fb5730c810d";};
|
||||||
|
# start from scratch, this is the working version with split fcs_wake and fcs_wait
|
||||||
|
nixos-debug = nixos-fcs-kernel {gitCommit = "c9a39d6a4ca83845b4e71fcc268fb0a76aff1bdf"; branch = "fcs-test"; };
|
||||||
|
|
||||||
|
nixos-fcsv1-lockstat = nixos-fcs-kernel {
|
||||||
|
gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";
|
||||||
lockStat = true;
|
lockStat = true;
|
||||||
};
|
};
|
||||||
nixos-fcs-lockstat-preempt = nixos-fcs.override {
|
nixos-fcsv2-lockstat = nixos-fcs-kernel {
|
||||||
|
gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";
|
||||||
|
lockStat = true;
|
||||||
|
};
|
||||||
|
nixos-fcsv3-lockstat = nixos-fcs-kernel {
|
||||||
|
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
|
||||||
|
lockStat = true;
|
||||||
|
};
|
||||||
|
nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel {
|
||||||
|
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
|
||||||
lockStat = true;
|
lockStat = true;
|
||||||
preempt = true;
|
preempt = true;
|
||||||
};
|
};
|
||||||
|
7
m/eudy/slurm.nix
Normal file
7
m/eudy/slurm.nix
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{ lib, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
services.slurm = {
|
||||||
|
client.enable = lib.mkForce false;
|
||||||
|
};
|
||||||
|
}
|
@ -1,162 +0,0 @@
|
|||||||
modules:
|
|
||||||
http_2xx:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
proxy_url: "http://127.0.0.1:23080"
|
|
||||||
skip_resolve_phase_with_proxy: true
|
|
||||||
follow_redirects: true
|
|
||||||
valid_status_codes: [] # Defaults to 2xx
|
|
||||||
method: GET
|
|
||||||
http_with_proxy:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
proxy_url: "http://127.0.0.1:3128"
|
|
||||||
skip_resolve_phase_with_proxy: true
|
|
||||||
http_with_proxy_and_headers:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
proxy_url: "http://127.0.0.1:3128"
|
|
||||||
proxy_connect_header:
|
|
||||||
Proxy-Authorization:
|
|
||||||
- Bearer token
|
|
||||||
http_post_2xx:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
method: POST
|
|
||||||
headers:
|
|
||||||
Content-Type: application/json
|
|
||||||
body: '{}'
|
|
||||||
http_post_body_file:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
method: POST
|
|
||||||
body_file: "/files/body.txt"
|
|
||||||
http_basic_auth_example:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
method: POST
|
|
||||||
headers:
|
|
||||||
Host: "login.example.com"
|
|
||||||
basic_auth:
|
|
||||||
username: "username"
|
|
||||||
password: "mysecret"
|
|
||||||
http_2xx_oauth_client_credentials:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
|
|
||||||
follow_redirects: true
|
|
||||||
preferred_ip_protocol: "ip4"
|
|
||||||
valid_status_codes:
|
|
||||||
- 200
|
|
||||||
- 201
|
|
||||||
oauth2:
|
|
||||||
client_id: "client_id"
|
|
||||||
client_secret: "client_secret"
|
|
||||||
token_url: "https://api.example.com/token"
|
|
||||||
endpoint_params:
|
|
||||||
grant_type: "client_credentials"
|
|
||||||
http_custom_ca_example:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
method: GET
|
|
||||||
tls_config:
|
|
||||||
ca_file: "/certs/my_cert.crt"
|
|
||||||
http_gzip:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
method: GET
|
|
||||||
compression: gzip
|
|
||||||
http_gzip_with_accept_encoding:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
method: GET
|
|
||||||
compression: gzip
|
|
||||||
headers:
|
|
||||||
Accept-Encoding: gzip
|
|
||||||
tls_connect:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
tls: true
|
|
||||||
tcp_connect_example:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
imap_starttls:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
query_response:
|
|
||||||
- expect: "OK.*STARTTLS"
|
|
||||||
- send: ". STARTTLS"
|
|
||||||
- expect: "OK"
|
|
||||||
- starttls: true
|
|
||||||
- send: ". capability"
|
|
||||||
- expect: "CAPABILITY IMAP4rev1"
|
|
||||||
smtp_starttls:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
query_response:
|
|
||||||
- expect: "^220 ([^ ]+) ESMTP (.+)$"
|
|
||||||
- send: "EHLO prober\r"
|
|
||||||
- expect: "^250-STARTTLS"
|
|
||||||
- send: "STARTTLS\r"
|
|
||||||
- expect: "^220"
|
|
||||||
- starttls: true
|
|
||||||
- send: "EHLO prober\r"
|
|
||||||
- expect: "^250-AUTH"
|
|
||||||
- send: "QUIT\r"
|
|
||||||
irc_banner_example:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
query_response:
|
|
||||||
- send: "NICK prober"
|
|
||||||
- send: "USER prober prober prober :prober"
|
|
||||||
- expect: "PING :([^ ]+)"
|
|
||||||
send: "PONG ${1}"
|
|
||||||
- expect: "^:[^ ]+ 001"
|
|
||||||
icmp:
|
|
||||||
prober: icmp
|
|
||||||
timeout: 5s
|
|
||||||
icmp:
|
|
||||||
preferred_ip_protocol: "ip4"
|
|
||||||
dns_udp_example:
|
|
||||||
prober: dns
|
|
||||||
timeout: 5s
|
|
||||||
dns:
|
|
||||||
query_name: "www.prometheus.io"
|
|
||||||
query_type: "A"
|
|
||||||
valid_rcodes:
|
|
||||||
- NOERROR
|
|
||||||
validate_answer_rrs:
|
|
||||||
fail_if_matches_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
fail_if_all_match_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
fail_if_not_matches_regexp:
|
|
||||||
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
|
|
||||||
fail_if_none_matches_regexp:
|
|
||||||
- "127.0.0.1"
|
|
||||||
validate_authority_rrs:
|
|
||||||
fail_if_matches_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
validate_additional_rrs:
|
|
||||||
fail_if_matches_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
dns_soa:
|
|
||||||
prober: dns
|
|
||||||
dns:
|
|
||||||
query_name: "prometheus.io"
|
|
||||||
query_type: "SOA"
|
|
||||||
dns_tcp_example:
|
|
||||||
prober: dns
|
|
||||||
dns:
|
|
||||||
transport_protocol: "tcp" # defaults to "udp"
|
|
||||||
preferred_ip_protocol: "ip4" # defaults to "ip6"
|
|
||||||
query_name: "www.prometheus.io"
|
|
@ -1,8 +1,8 @@
|
|||||||
{ config, pkgs, ... }:
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
# Mounts the /ceph filesystem at boot
|
|
||||||
{
|
{
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
|
ceph
|
||||||
ceph-client
|
ceph-client
|
||||||
fio # For benchmarks
|
fio # For benchmarks
|
||||||
];
|
];
|
||||||
@ -11,14 +11,14 @@
|
|||||||
# modprobe command.
|
# modprobe command.
|
||||||
boot.kernelModules = [ "ceph" ];
|
boot.kernelModules = [ "ceph" ];
|
||||||
|
|
||||||
age.secrets.cephUser.file = ../../secrets/ceph-user.age;
|
age.secrets."secrets/ceph-user".file = ./secrets/ceph-user.age;
|
||||||
|
|
||||||
fileSystems."/ceph" = {
|
fileSystems."/ceph" = {
|
||||||
fsType = "ceph";
|
fsType = "ceph";
|
||||||
device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/";
|
device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/";
|
||||||
options = [
|
options = [
|
||||||
"mon_addr=10.0.40.40"
|
"mon_addr=10.0.42.40"
|
||||||
"secretfile=${config.age.secrets.cephUser.path}"
|
"secretfile=${config.age.secrets."secrets/ceph-user".path}"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -1,23 +1,20 @@
|
|||||||
{ config, pkgs, ... }:
|
{ config, pkgs, agenix, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/main.nix
|
||||||
|
|
||||||
../module/ceph.nix
|
|
||||||
../module/debuginfod.nix
|
|
||||||
../module/emulation.nix
|
|
||||||
../module/slurm-client.nix
|
|
||||||
./gitlab-runner.nix
|
./gitlab-runner.nix
|
||||||
./monitoring.nix
|
./monitoring.nix
|
||||||
./nfs.nix
|
./nfs.nix
|
||||||
./slurm-server.nix
|
./slurm-daemon.nix
|
||||||
./nix-serve.nix
|
./ceph.nix
|
||||||
./public-inbox.nix
|
|
||||||
./gitea.nix
|
|
||||||
./msmtp.nix
|
|
||||||
./postgresql.nix
|
|
||||||
#./pxe.nix
|
#./pxe.nix
|
||||||
|
agenix.nixosModules.default
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = [
|
||||||
|
agenix.packages.x86_64-linux.default
|
||||||
];
|
];
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
@ -33,15 +30,5 @@
|
|||||||
address = "10.0.42.7";
|
address = "10.0.42.7";
|
||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
} ];
|
} ];
|
||||||
firewall = {
|
|
||||||
extraCommands = ''
|
|
||||||
# Accept all proxy traffic from compute nodes but not the login
|
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.30 --dport 23080 -j nixos-fw-log-refuse
|
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 23080 -j nixos-fw-accept
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
# Allow proxy to bind to the ethernet interface
|
|
||||||
services.openssh.settings.GatewayPorts = "clientspecified";
|
|
||||||
}
|
}
|
||||||
|
@ -1,63 +0,0 @@
|
|||||||
{ config, lib, ... }:
|
|
||||||
{
|
|
||||||
age.secrets.giteaRunnerToken.file = ../../secrets/gitea-runner-token.age;
|
|
||||||
|
|
||||||
services.gitea = {
|
|
||||||
enable = true;
|
|
||||||
appName = "Gitea in the jungle";
|
|
||||||
|
|
||||||
settings = {
|
|
||||||
server = {
|
|
||||||
ROOT_URL = "https://jungle.bsc.es/git/";
|
|
||||||
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
|
|
||||||
LANDING_PAGE = "explore";
|
|
||||||
};
|
|
||||||
metrics.ENABLED = true;
|
|
||||||
service = {
|
|
||||||
REGISTER_MANUAL_CONFIRM = true;
|
|
||||||
ENABLE_NOTIFY_MAIL = true;
|
|
||||||
};
|
|
||||||
log.LEVEL = "Warn";
|
|
||||||
|
|
||||||
mailer = {
|
|
||||||
ENABLED = true;
|
|
||||||
FROM = "jungle-robot@bsc.es";
|
|
||||||
PROTOCOL = "sendmail";
|
|
||||||
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
|
|
||||||
SENDMAIL_ARGS = "--";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
services.gitea-actions-runner.instances = {
|
|
||||||
runrun = {
|
|
||||||
enable = true;
|
|
||||||
name = "runrun";
|
|
||||||
url = "https://jungle.bsc.es/git/";
|
|
||||||
tokenFile = config.age.secrets.giteaRunnerToken.path;
|
|
||||||
labels = [ "native:host" ];
|
|
||||||
settings.runner.capacity = 8;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
systemd.services.gitea-runner-runrun = {
|
|
||||||
path = [ "/run/current-system/sw" ];
|
|
||||||
serviceConfig = {
|
|
||||||
# DynamicUser doesn't work well with SSH
|
|
||||||
DynamicUser = lib.mkForce false;
|
|
||||||
User = "gitea-runner";
|
|
||||||
Group = "gitea-runner";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
users.users.gitea-runner = {
|
|
||||||
isSystemUser = true;
|
|
||||||
home = "/var/lib/gitea-runner";
|
|
||||||
description = "Gitea Runner";
|
|
||||||
group = "gitea-runner";
|
|
||||||
extraGroups = [ "docker" ];
|
|
||||||
createHome = true;
|
|
||||||
};
|
|
||||||
users.groups.gitea-runner = {};
|
|
||||||
}
|
|
||||||
|
|
@ -1,37 +1,43 @@
|
|||||||
{ pkgs, lib, config, ... }:
|
{ pkgs, lib, config, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age;
|
age.secrets."secrets/ovni-token".file = ./secrets/ovni-token.age;
|
||||||
age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age;
|
age.secrets."secrets/nosv-token".file = ./secrets/nosv-token.age;
|
||||||
|
|
||||||
services.gitlab-runner = {
|
services.gitlab-runner = {
|
||||||
enable = true;
|
enable = true;
|
||||||
settings.concurrent = 5;
|
settings.concurrent = 5;
|
||||||
services = let
|
services = {
|
||||||
common-shell = {
|
ovni-shell = {
|
||||||
|
registrationConfigFile = config.age.secrets."secrets/ovni-token".path;
|
||||||
executor = "shell";
|
executor = "shell";
|
||||||
|
tagList = [ "nix" "xeon" ];
|
||||||
environmentVariables = {
|
environmentVariables = {
|
||||||
SHELL = "${pkgs.bash}/bin/bash";
|
SHELL = "${pkgs.bash}/bin/bash";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
common-docker = {
|
ovni-docker = {
|
||||||
executor = "docker";
|
registrationConfigFile = config.age.secrets."secrets/ovni-token".path;
|
||||||
dockerImage = "debian:stable";
|
dockerImage = "debian:stable";
|
||||||
registrationFlags = [
|
tagList = [ "docker" "xeon" ];
|
||||||
"--docker-network-mode host"
|
registrationFlags = [ "--docker-network-mode host" ];
|
||||||
];
|
|
||||||
environmentVariables = {
|
environmentVariables = {
|
||||||
https_proxy = "http://localhost:23080";
|
https_proxy = "http://localhost:23080";
|
||||||
http_proxy = "http://localhost:23080";
|
http_proxy = "http://localhost:23080";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
in {
|
nosv-docker = {
|
||||||
# For pm.bsc.es/gitlab
|
registrationConfigFile = config.age.secrets."secrets/nosv-token".path;
|
||||||
gitlab-pm-shell = common-shell // {
|
dockerImage = "debian:stable";
|
||||||
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path;
|
tagList = [ "docker" "xeon" ];
|
||||||
};
|
registrationFlags = [
|
||||||
gitlab-pm-docker = common-docker // {
|
"--docker-network-mode host"
|
||||||
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path;
|
"--docker-cpus 56"
|
||||||
|
];
|
||||||
|
environmentVariables = {
|
||||||
|
https_proxy = "http://localhost:23080";
|
||||||
|
http_proxy = "http://localhost:23080";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -1,14 +1,6 @@
|
|||||||
{ config, lib, ... }:
|
{ config, lib, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [ ../module/slurm-exporter.nix ];
|
|
||||||
|
|
||||||
age.secrets.grafanaJungleRobotPassword = {
|
|
||||||
file = ../../secrets/jungle-robot-password.age;
|
|
||||||
owner = "grafana";
|
|
||||||
mode = "400";
|
|
||||||
};
|
|
||||||
|
|
||||||
services.grafana = {
|
services.grafana = {
|
||||||
enable = true;
|
enable = true;
|
||||||
settings = {
|
settings = {
|
||||||
@ -19,30 +11,14 @@
|
|||||||
http_port = 2342;
|
http_port = 2342;
|
||||||
http_addr = "127.0.0.1";
|
http_addr = "127.0.0.1";
|
||||||
};
|
};
|
||||||
smtp = {
|
|
||||||
enabled = true;
|
|
||||||
from_address = "jungle-robot@bsc.es";
|
|
||||||
user = "jungle-robot";
|
|
||||||
# Read the password from a file, which is only readable by grafana user
|
|
||||||
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
|
|
||||||
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
|
|
||||||
host = "mail.bsc.es:465";
|
|
||||||
startTLS_policy = "NoStartTLS";
|
|
||||||
};
|
|
||||||
feature_toggles.publicDashboards = true;
|
feature_toggles.publicDashboards = true;
|
||||||
"auth.anonymous".enabled = true;
|
|
||||||
log.level = "warn";
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# Make grafana alerts also use the proxy
|
|
||||||
systemd.services.grafana.environment = config.networking.proxy.envVars;
|
|
||||||
|
|
||||||
services.prometheus = {
|
services.prometheus = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 9001;
|
port = 9001;
|
||||||
retentionTime = "1y";
|
retentionTime = "1y";
|
||||||
listenAddress = "127.0.0.1";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false;
|
systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false;
|
||||||
@ -72,23 +48,13 @@
|
|||||||
user = "root";
|
user = "root";
|
||||||
configFile = ./ipmi.yml;
|
configFile = ./ipmi.yml;
|
||||||
#extraFlags = [ "--log.level=debug" ];
|
#extraFlags = [ "--log.level=debug" ];
|
||||||
listenAddress = "127.0.0.1";
|
|
||||||
};
|
};
|
||||||
node = {
|
node = {
|
||||||
enable = true;
|
enable = true;
|
||||||
enabledCollectors = [ "systemd" ];
|
enabledCollectors = [ "systemd" ];
|
||||||
port = 9002;
|
port = 9002;
|
||||||
listenAddress = "127.0.0.1";
|
|
||||||
};
|
|
||||||
smartctl = {
|
|
||||||
enable = true;
|
|
||||||
listenAddress = "127.0.0.1";
|
|
||||||
};
|
|
||||||
blackbox = {
|
|
||||||
enable = true;
|
|
||||||
listenAddress = "127.0.0.1";
|
|
||||||
configFile = ./blackbox.yml;
|
|
||||||
};
|
};
|
||||||
|
smartctl.enable = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
scrapeConfigs = [
|
scrapeConfigs = [
|
||||||
@ -101,8 +67,6 @@
|
|||||||
"127.0.0.1:9323"
|
"127.0.0.1:9323"
|
||||||
"127.0.0.1:9252"
|
"127.0.0.1:9252"
|
||||||
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
|
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
|
||||||
"127.0.0.1:9341" # Slurm exporter
|
|
||||||
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
|
|
||||||
];
|
];
|
||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
@ -116,71 +80,6 @@
|
|||||||
];
|
];
|
||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
{
|
|
||||||
job_name = "blackbox-http";
|
|
||||||
metrics_path = "/probe";
|
|
||||||
params = { module = [ "http_2xx" ]; };
|
|
||||||
static_configs = [{
|
|
||||||
targets = [
|
|
||||||
"https://www.google.com/robots.txt"
|
|
||||||
"https://pm.bsc.es/"
|
|
||||||
"https://pm.bsc.es/gitlab/"
|
|
||||||
"https://jungle.bsc.es/"
|
|
||||||
"https://gitlab.bsc.es/"
|
|
||||||
];
|
|
||||||
}];
|
|
||||||
relabel_configs = [
|
|
||||||
{
|
|
||||||
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
|
||||||
source_labels = [ "__address__" ];
|
|
||||||
target_label = "__param_target";
|
|
||||||
}
|
|
||||||
{
|
|
||||||
# Sets the "instance" label with the remote host we are querying
|
|
||||||
source_labels = [ "__param_target" ];
|
|
||||||
target_label = "instance";
|
|
||||||
}
|
|
||||||
{
|
|
||||||
# Shows the host target address instead of the blackbox address
|
|
||||||
target_label = "__address__";
|
|
||||||
replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
|
|
||||||
}
|
|
||||||
];
|
|
||||||
}
|
|
||||||
{
|
|
||||||
job_name = "blackbox-icmp";
|
|
||||||
metrics_path = "/probe";
|
|
||||||
params = { module = [ "icmp" ]; };
|
|
||||||
static_configs = [{
|
|
||||||
targets = [
|
|
||||||
"1.1.1.1"
|
|
||||||
"8.8.8.8"
|
|
||||||
"ssfhead"
|
|
||||||
"anella-bsc.cesca.cat"
|
|
||||||
];
|
|
||||||
}];
|
|
||||||
relabel_configs = [
|
|
||||||
{
|
|
||||||
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
|
||||||
source_labels = [ "__address__" ];
|
|
||||||
target_label = "__param_target";
|
|
||||||
}
|
|
||||||
{
|
|
||||||
# Sets the "instance" label with the remote host we are querying
|
|
||||||
source_labels = [ "__param_target" ];
|
|
||||||
target_label = "instance";
|
|
||||||
}
|
|
||||||
{
|
|
||||||
# Shows the host target address instead of the blackbox address
|
|
||||||
target_label = "__address__";
|
|
||||||
replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
|
|
||||||
}
|
|
||||||
];
|
|
||||||
}
|
|
||||||
{
|
|
||||||
job_name = "gitea";
|
|
||||||
static_configs = [{ targets = [ "127.0.0.1:3000" ]; }];
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
# Scrape the IPMI info of the hosts remotely via LAN
|
# Scrape the IPMI info of the hosts remotely via LAN
|
||||||
job_name = "ipmi-lan";
|
job_name = "ipmi-lan";
|
||||||
@ -233,17 +132,6 @@
|
|||||||
}
|
}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
{
|
|
||||||
job_name = "ipmi-raccoon";
|
|
||||||
metrics_path = "/ipmi";
|
|
||||||
static_configs = [
|
|
||||||
{ targets = [ "127.0.0.1:9291" ]; }
|
|
||||||
];
|
|
||||||
params = {
|
|
||||||
target = [ "84.88.51.142" ];
|
|
||||||
module = [ "raccoon" ];
|
|
||||||
};
|
|
||||||
}
|
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1,24 +0,0 @@
|
|||||||
{ config, lib, ... }:
|
|
||||||
{
|
|
||||||
age.secrets.jungleRobotPassword = {
|
|
||||||
file = ../../secrets/jungle-robot-password.age;
|
|
||||||
group = "gitea";
|
|
||||||
mode = "440";
|
|
||||||
};
|
|
||||||
|
|
||||||
programs.msmtp = {
|
|
||||||
enable = true;
|
|
||||||
accounts = {
|
|
||||||
default = {
|
|
||||||
auth = true;
|
|
||||||
tls = true;
|
|
||||||
tls_starttls = false;
|
|
||||||
port = 465;
|
|
||||||
host = "mail.bsc.es";
|
|
||||||
user = "jungle-robot";
|
|
||||||
passwordeval = "cat ${config.age.secrets.jungleRobotPassword.path}";
|
|
||||||
from = "jungle-robot@bsc.es";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,16 +0,0 @@
|
|||||||
{ config, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
|
|
||||||
|
|
||||||
services.nix-serve = {
|
|
||||||
enable = true;
|
|
||||||
# Only listen locally, as we serve it via ssh
|
|
||||||
bindAddress = "127.0.0.1";
|
|
||||||
port = 5000;
|
|
||||||
|
|
||||||
secretKeyFile = config.age.secrets.nixServe.path;
|
|
||||||
# Public key:
|
|
||||||
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,19 +0,0 @@
|
|||||||
{ lib, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
services.postgresql = {
|
|
||||||
enable = true;
|
|
||||||
ensureDatabases = [ "perftestsdb" ];
|
|
||||||
ensureUsers = [
|
|
||||||
{ name = "anavarro"; ensureClauses.superuser = true; }
|
|
||||||
{ name = "rarias"; ensureClauses.superuser = true; }
|
|
||||||
{ name = "grafana"; }
|
|
||||||
];
|
|
||||||
authentication = ''
|
|
||||||
#type database DBuser auth-method
|
|
||||||
local perftestsdb rarias trust
|
|
||||||
local perftestsdb anavarro trust
|
|
||||||
local perftestsdb grafana trust
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,79 +0,0 @@
|
|||||||
/*
|
|
||||||
* CC0-1.0 <https://creativecommons.org/publicdomain/zero/1.0/legalcode>
|
|
||||||
* Dark color scheme using 216 web-safe colors, inspired
|
|
||||||
* somewhat by the default color scheme in mutt.
|
|
||||||
* It reduces eyestrain for me, and energy usage for all:
|
|
||||||
* https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
|
|
||||||
*/
|
|
||||||
|
|
||||||
* {
|
|
||||||
font-size: 14px;
|
|
||||||
font-family: monospace;
|
|
||||||
}
|
|
||||||
|
|
||||||
pre {
|
|
||||||
white-space: pre-wrap;
|
|
||||||
padding: 10px;
|
|
||||||
background: #f5f5f5;
|
|
||||||
}
|
|
||||||
|
|
||||||
hr {
|
|
||||||
margin: 30px 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
body {
|
|
||||||
max-width: 120ex; /* 120 columns wide */
|
|
||||||
margin: 50px auto;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Underlined links add visual noise which make them hard-to-read.
|
|
||||||
* Use colors to make them stand out, instead.
|
|
||||||
*/
|
|
||||||
a:link {
|
|
||||||
color: #007;
|
|
||||||
text-decoration: none;
|
|
||||||
}
|
|
||||||
a:visited {
|
|
||||||
color:#504;
|
|
||||||
}
|
|
||||||
a:hover {
|
|
||||||
text-decoration: underline;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* quoted text in emails gets a different color */
|
|
||||||
*.q { color:gray }
|
|
||||||
|
|
||||||
/*
|
|
||||||
* these may be used with cgit <https://git.zx2c4.com/cgit/>, too.
|
|
||||||
* (cgit uses <div>, public-inbox uses <span>)
|
|
||||||
*/
|
|
||||||
*.add { color:darkgreen } /* diff post-image lines */
|
|
||||||
*.del { color:darkred } /* diff pre-image lines */
|
|
||||||
*.head { color:black } /* diff header (metainformation) */
|
|
||||||
*.hunk { color:gray } /* diff hunk-header */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* highlight 3.x colors (tested 3.18) for displaying blobs.
|
|
||||||
* This doesn't use most of the colors available, as I find too
|
|
||||||
* many colors overwhelming, so the default is commented out.
|
|
||||||
*/
|
|
||||||
.hl.num { color:#f30 } /* number */
|
|
||||||
.hl.esc { color:#f0f } /* escape character */
|
|
||||||
.hl.str { color:#f30 } /* string */
|
|
||||||
.hl.ppc { color:#f0f } /* preprocessor */
|
|
||||||
.hl.pps { color:#f30 } /* preprocessor string */
|
|
||||||
.hl.slc { color:#09f } /* single-line comment */
|
|
||||||
.hl.com { color:#09f } /* multi-line comment */
|
|
||||||
/* .hl.opt { color:#ccc } */ /* operator */
|
|
||||||
/* .hl.ipl { color:#ccc } */ /* interpolation */
|
|
||||||
|
|
||||||
/* keyword groups kw[a-z] */
|
|
||||||
.hl.kwa { color:#ff0 }
|
|
||||||
.hl.kwb { color:#0f0 }
|
|
||||||
.hl.kwc { color:#ff0 }
|
|
||||||
/* .hl.kwd { color:#ccc } */
|
|
||||||
|
|
||||||
/* line-number (unused by public-inbox) */
|
|
||||||
/* .hl.lin { color:#ccc } */
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
|||||||
{ lib, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
services.public-inbox = {
|
|
||||||
enable = true;
|
|
||||||
http = {
|
|
||||||
enable = true;
|
|
||||||
port = 8081;
|
|
||||||
mounts = [ "/lists" ];
|
|
||||||
};
|
|
||||||
settings.publicinbox = {
|
|
||||||
css = [ "${./public-inbox.css}" ];
|
|
||||||
wwwlisting = "all";
|
|
||||||
};
|
|
||||||
inboxes = {
|
|
||||||
bscpkgs = {
|
|
||||||
url = "https://jungle.bsc.es/lists/bscpkgs";
|
|
||||||
address = [ "~rodarima/bscpkgs@lists.sr.ht" ];
|
|
||||||
watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ];
|
|
||||||
description = "Patches for bscpkgs";
|
|
||||||
listid = "~rodarima/bscpkgs.lists.sr.ht";
|
|
||||||
};
|
|
||||||
jungle = {
|
|
||||||
url = "https://jungle.bsc.es/lists/jungle";
|
|
||||||
address = [ "~rodarima/jungle@lists.sr.ht" ];
|
|
||||||
watch = [ "imaps://jungle-robot%40gmx.com@imap.gmx.com/INBOX" ];
|
|
||||||
description = "Patches for jungle";
|
|
||||||
listid = "~rodarima/jungle.lists.sr.ht";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# We need access to the network for the watch service, as we will fetch the
|
|
||||||
# emails directly from the IMAP server.
|
|
||||||
systemd.services.public-inbox-watch.serviceConfig = {
|
|
||||||
PrivateNetwork = lib.mkForce false;
|
|
||||||
RestrictAddressFamilies = lib.mkForce [ "AF_UNIX" "AF_INET" "AF_INET6" ];
|
|
||||||
KillSignal = "SIGKILL"; # Avoid slow shutdown
|
|
||||||
|
|
||||||
# Required for chmod(..., 02750) on directories by git, from
|
|
||||||
# systemd.exec(8):
|
|
||||||
# > Note that this restricts marking of any type of file system object with
|
|
||||||
# > these bits, including both regular files and directories (where the SGID
|
|
||||||
# > is a different meaning than for files, see documentation).
|
|
||||||
RestrictSUIDSGID = lib.mkForce false;
|
|
||||||
};
|
|
||||||
}
|
|
11
m/hut/secrets.nix
Normal file
11
m/hut/secrets.nix
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
let
|
||||||
|
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
||||||
|
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb";
|
||||||
|
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1";
|
||||||
|
default = [ rarias root hut ];
|
||||||
|
in
|
||||||
|
{
|
||||||
|
"secrets/ovni-token.age".publicKeys = default;
|
||||||
|
"secrets/nosv-token.age".publicKeys = default;
|
||||||
|
"secrets/ceph-user.age".publicKeys = default;
|
||||||
|
}
|
11
m/hut/secrets/ceph-user.age
Normal file
11
m/hut/secrets/ceph-user.age
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
age-encryption.org/v1
|
||||||
|
-> ssh-ed25519 CAWG4Q 35Ak+Mep9k5KnDLF1ywDbMD4l4mRFg6D0et19tqXxAw
|
||||||
|
Wgr+CX4rzrPmUszSidtLAVSvgD80F2dqtd92hGZIFwo
|
||||||
|
-> ssh-ed25519 MSF3dg OVFvpkAyWTowtxsafstX31H/hJpNZmnOCbvqMIN0+AQ
|
||||||
|
VxjRcQmp+BadEh2y0PB96EeizIl3tTQpVu0CWHmsc1s
|
||||||
|
-> ssh-ed25519 HY2yRg MJSQIpre9m0XnojgXuKQ/+hVBZNrZNGZqplwhqicpjI
|
||||||
|
CLkE52iqpoqSnbzisNjQgxTfNqKeaRl5ntcw1d+ZDyQ
|
||||||
|
-> m$8`De%~-grease '85p}`by
|
||||||
|
52zMpprONcawWDDtzHdWNwFoYXErPUnVjhSONbUBpDlqAmJmD1LcAnsU
|
||||||
|
--- 0vZOPyXQIMMGTwgFfvm8Sn8O7vjrsjGUEy5m/BASCyc
|
||||||
|
È| üœ)‡<>ËëË*_ËDóUS`<06><>‹àŠèr Âs<C382>¢NªÈ[ÖŒ^e+A1œ“G.í#âù°m˜¸Wß ’5·àƒµ(
|
BIN
m/hut/secrets/nosv-token.age
Normal file
BIN
m/hut/secrets/nosv-token.age
Normal file
Binary file not shown.
BIN
m/hut/secrets/ovni-token.age
Normal file
BIN
m/hut/secrets/ovni-token.age
Normal file
Binary file not shown.
11
m/hut/slurm-daemon.nix
Normal file
11
m/hut/slurm-daemon.nix
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
services.slurm = {
|
||||||
|
server.enable = true;
|
||||||
|
partitionName = [
|
||||||
|
"owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP"
|
||||||
|
"all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
@ -1,7 +0,0 @@
|
|||||||
{ ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
services.slurm = {
|
|
||||||
server.enable = true;
|
|
||||||
};
|
|
||||||
}
|
|
@ -7,9 +7,5 @@
|
|||||||
- 10.0.40.106
|
- 10.0.40.106
|
||||||
- 10.0.40.107
|
- 10.0.40.107
|
||||||
- 10.0.40.108
|
- 10.0.40.108
|
||||||
# Storage
|
|
||||||
- 10.0.40.141
|
|
||||||
- 10.0.40.142
|
|
||||||
- 10.0.40.143
|
|
||||||
labels:
|
labels:
|
||||||
job: ipmi-lan
|
job: ipmi-lan
|
||||||
|
@ -2,11 +2,13 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/main.nix
|
||||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||||
|
|
||||||
../eudy/cpufreq.nix
|
../eudy/cpufreq.nix
|
||||||
../eudy/users.nix
|
../eudy/users.nix
|
||||||
|
../eudy/slurm.nix
|
||||||
|
./users.nix
|
||||||
./kernel.nix
|
./kernel.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -1,29 +1,9 @@
|
|||||||
{ pkgs, lib, ... }:
|
{ pkgs, lib, ... }:
|
||||||
|
|
||||||
let
|
let
|
||||||
#fcs-devel = pkgs.linuxPackages_custom {
|
kernel = nixos-fcsv4;
|
||||||
# version = "6.2.8";
|
|
||||||
# src = /mnt/data/kernel/fcs/kernel/src;
|
|
||||||
# configfile = /mnt/data/kernel/fcs/kernel/configs/defconfig;
|
|
||||||
#};
|
|
||||||
|
|
||||||
#fcsv1 = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" false;
|
nixos-fcs-kernel = {gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
|
||||||
#fcsv2 = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" false;
|
|
||||||
#fcsv1-lockdep = fcs-kernel "bc11660676d3d68ce2459b9fb5d5e654e3f413be" true;
|
|
||||||
#fcsv2-lockdep = fcs-kernel "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1" true;
|
|
||||||
#fcs-kernel = gitCommit: lockdep: pkgs.linuxPackages_custom {
|
|
||||||
# version = "6.2.8";
|
|
||||||
# src = builtins.fetchGit {
|
|
||||||
# url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
|
||||||
# rev = gitCommit;
|
|
||||||
# ref = "fcs";
|
|
||||||
# };
|
|
||||||
# configfile = if lockdep then ./configs/lockdep else ./configs/defconfig;
|
|
||||||
#};
|
|
||||||
|
|
||||||
kernel = nixos-fcs;
|
|
||||||
|
|
||||||
nixos-fcs-kernel = lib.makeOverridable ({gitCommit, lockStat ? false, preempt ? false, branch ? "fcs"}: pkgs.linuxPackagesFor (pkgs.buildLinux rec {
|
|
||||||
version = "6.2.8";
|
version = "6.2.8";
|
||||||
src = builtins.fetchGit {
|
src = builtins.fetchGit {
|
||||||
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
url = "git@bscpm03.bsc.es:ompss-kernel/linux.git";
|
||||||
@ -40,13 +20,27 @@ let
|
|||||||
};
|
};
|
||||||
kernelPatches = [];
|
kernelPatches = [];
|
||||||
extraMeta.branch = lib.versions.majorMinor version;
|
extraMeta.branch = lib.versions.majorMinor version;
|
||||||
}));
|
});
|
||||||
|
|
||||||
nixos-fcs = nixos-fcs-kernel {gitCommit = "8a09822dfcc8f0626b209d6d2aec8b5da459dfee";};
|
nixos-fcsv1 = nixos-fcs-kernel {gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";};
|
||||||
nixos-fcs-lockstat = nixos-fcs.override {
|
nixos-fcsv2 = nixos-fcs-kernel {gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";};
|
||||||
|
nixos-fcsv3 = nixos-fcs-kernel {gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";};
|
||||||
|
nixos-fcsv4 = nixos-fcs-kernel {gitCommit = "c94c3d946f33ac3e5782a02ee002cc1164c0cb4f";};
|
||||||
|
|
||||||
|
nixos-fcsv1-lockstat = nixos-fcs-kernel {
|
||||||
|
gitCommit = "bc11660676d3d68ce2459b9fb5d5e654e3f413be";
|
||||||
lockStat = true;
|
lockStat = true;
|
||||||
};
|
};
|
||||||
nixos-fcs-lockstat-preempt = nixos-fcs.override {
|
nixos-fcsv2-lockstat = nixos-fcs-kernel {
|
||||||
|
gitCommit = "db0f2eca0cd57a58bf456d7d2c7d5d8fdb25dfb1";
|
||||||
|
lockStat = true;
|
||||||
|
};
|
||||||
|
nixos-fcsv3-lockstat = nixos-fcs-kernel {
|
||||||
|
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
|
||||||
|
lockStat = true;
|
||||||
|
};
|
||||||
|
nixos-fcsv3-lockstat-preempt = nixos-fcs-kernel {
|
||||||
|
gitCommit = "6c17394890704c3345ac1a521bb547164b36b154";
|
||||||
lockStat = true;
|
lockStat = true;
|
||||||
preempt = true;
|
preempt = true;
|
||||||
};
|
};
|
||||||
@ -66,5 +60,5 @@ in {
|
|||||||
|
|
||||||
# enable memory overcommit, needed to build a taglibc system using nix after
|
# enable memory overcommit, needed to build a taglibc system using nix after
|
||||||
# increasing the openblas memory footprint
|
# increasing the openblas memory footprint
|
||||||
boot.kernel.sysctl."vm.overcommit_memory" = 1;
|
boot.kernel.sysctl."vm.overcommit_memory" = lib.mkForce 1;
|
||||||
}
|
}
|
||||||
|
17
m/koro/users.nix
Normal file
17
m/koro/users.nix
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
users.users = {
|
||||||
|
vlopez = {
|
||||||
|
uid = 4334;
|
||||||
|
isNormalUser = true;
|
||||||
|
home = "/home/Computational/vlopez";
|
||||||
|
description = "Victor López";
|
||||||
|
group = "Computational";
|
||||||
|
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
@ -2,27 +2,28 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/main.nix
|
||||||
../module/monitoring.nix
|
../common/monitoring.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
||||||
|
|
||||||
boot.kernel.sysctl = {
|
|
||||||
"kernel.yama.ptrace_scope" = lib.mkForce "1";
|
|
||||||
};
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
ceph
|
ceph
|
||||||
];
|
];
|
||||||
|
|
||||||
|
services.slurm = {
|
||||||
|
client.enable = lib.mkForce false;
|
||||||
|
};
|
||||||
|
|
||||||
services.ceph = {
|
services.ceph = {
|
||||||
enable = true;
|
enable = true;
|
||||||
global = {
|
global = {
|
||||||
fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b";
|
fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b";
|
||||||
monHost = "10.0.40.40";
|
monHost = "10.0.42.40";
|
||||||
monInitialMembers = "bay";
|
monInitialMembers = "10.0.42.40";
|
||||||
clusterNetwork = "10.0.40.40/24"; # Use Ethernet only
|
publicNetwork = "10.0.42.40/24";
|
||||||
|
clusterNetwork = "10.0.42.40/24";
|
||||||
};
|
};
|
||||||
osd = {
|
osd = {
|
||||||
enable = true;
|
enable = true;
|
||||||
@ -49,16 +50,6 @@
|
|||||||
address = "10.0.42.42";
|
address = "10.0.42.42";
|
||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
} ];
|
} ];
|
||||||
firewall = {
|
|
||||||
extraCommands = ''
|
|
||||||
# Accept all incoming TCP traffic from bay
|
|
||||||
iptables -A nixos-fw -p tcp -s bay -j nixos-fw-accept
|
|
||||||
# Accept monitoring requests from hut
|
|
||||||
iptables -A nixos-fw -p tcp -s hut --dport 9002 -j nixos-fw-accept
|
|
||||||
# Accept all Ceph traffic from the local network
|
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 -m multiport --dport 3300,6789,6800:7568 -j nixos-fw-accept
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
# Missing service for volumes, see:
|
# Missing service for volumes, see:
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
{
|
|
||||||
services.nixseparatedebuginfod.enable = true;
|
|
||||||
}
|
|
@ -1,3 +0,0 @@
|
|||||||
{
|
|
||||||
boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ];
|
|
||||||
}
|
|
@ -1,24 +0,0 @@
|
|||||||
{ config, lib, ... }:
|
|
||||||
|
|
||||||
with lib;
|
|
||||||
|
|
||||||
{
|
|
||||||
options = {
|
|
||||||
users.jungleUsers = mkOption {
|
|
||||||
type = types.attrsOf (types.anything // { check = (x: x ? "hosts"); });
|
|
||||||
description = ''
|
|
||||||
Same as users.users but with the extra `hosts` attribute, which controls
|
|
||||||
access to the nodes by `networking.hostName`.
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
config = let
|
|
||||||
allowedUser = host: userConf: builtins.elem host userConf.hosts;
|
|
||||||
filterUsers = host: users: filterAttrs (n: v: allowedUser host v) users;
|
|
||||||
removeHosts = users: mapAttrs (n: v: builtins.removeAttrs v [ "hosts" ]) users;
|
|
||||||
currentHost = config.networking.hostName;
|
|
||||||
in {
|
|
||||||
users.users = removeHosts (filterUsers currentHost config.users.jungleUsers);
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,107 +0,0 @@
|
|||||||
{ config, pkgs, lib, ... }:
|
|
||||||
|
|
||||||
let
|
|
||||||
suspendProgram = pkgs.writeScript "suspend.sh" ''
|
|
||||||
#!/usr/bin/env bash
|
|
||||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
|
||||||
set -x
|
|
||||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
|
||||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
|
||||||
hosts=$(scontrol show hostnames $1)
|
|
||||||
for host in $hosts; do
|
|
||||||
echo Shutting down host: $host
|
|
||||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
|
||||||
done
|
|
||||||
'';
|
|
||||||
|
|
||||||
resumeProgram = pkgs.writeScript "resume.sh" ''
|
|
||||||
#!/usr/bin/env bash
|
|
||||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
|
||||||
set -x
|
|
||||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
|
||||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
|
||||||
hosts=$(scontrol show hostnames $1)
|
|
||||||
for host in $hosts; do
|
|
||||||
echo Starting host: $host
|
|
||||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
|
||||||
done
|
|
||||||
'';
|
|
||||||
|
|
||||||
in {
|
|
||||||
systemd.services.slurmd.serviceConfig = {
|
|
||||||
# Kill all processes in the control group on stop/restart. This will kill
|
|
||||||
# all the jobs running, so ensure that we only upgrade when the nodes are
|
|
||||||
# not in use. See:
|
|
||||||
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
|
||||||
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
|
||||||
KillMode = lib.mkForce "control-group";
|
|
||||||
};
|
|
||||||
|
|
||||||
services.slurm = {
|
|
||||||
client.enable = true;
|
|
||||||
controlMachine = "hut";
|
|
||||||
clusterName = "jungle";
|
|
||||||
nodeName = [
|
|
||||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
|
||||||
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
|
||||||
];
|
|
||||||
|
|
||||||
partitionName = [
|
|
||||||
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
|
||||||
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
|
||||||
];
|
|
||||||
|
|
||||||
# See slurm.conf(5) for more details about these options.
|
|
||||||
extraConfig = ''
|
|
||||||
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
|
||||||
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
|
||||||
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
|
||||||
# library in SLURM (--mpi=pmix). See more details here:
|
|
||||||
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
|
||||||
MpiDefault=pmix
|
|
||||||
|
|
||||||
# When a node reboots return that node to the slurm queue as soon as it
|
|
||||||
# becomes operative again.
|
|
||||||
ReturnToService=2
|
|
||||||
|
|
||||||
# Track all processes by using a cgroup
|
|
||||||
ProctrackType=proctrack/cgroup
|
|
||||||
|
|
||||||
# Enable task/affinity to allow the jobs to run in a specified subset of
|
|
||||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
|
||||||
TaskPlugin=task/affinity,task/cgroup
|
|
||||||
|
|
||||||
# Power off unused nodes until they are requested
|
|
||||||
SuspendProgram=${suspendProgram}
|
|
||||||
SuspendTimeout=60
|
|
||||||
ResumeProgram=${resumeProgram}
|
|
||||||
ResumeTimeout=300
|
|
||||||
SuspendExcNodes=hut
|
|
||||||
|
|
||||||
# Turn the nodes off after 1 hour of inactivity
|
|
||||||
SuspendTime=3600
|
|
||||||
|
|
||||||
# Reduce port range so we can allow only this range in the firewall
|
|
||||||
SrunPortRange=60000-61000
|
|
||||||
|
|
||||||
# Use cores as consumable resources. In SLURM terms, a core may have
|
|
||||||
# multiple hardware threads (or CPUs).
|
|
||||||
SelectType=select/cons_tres
|
|
||||||
|
|
||||||
# Ignore memory constraints and only use unused cores to share a node with
|
|
||||||
# other jobs.
|
|
||||||
SelectTypeParameters=CR_Core
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
age.secrets.mungeKey = {
|
|
||||||
file = ../../secrets/munge-key.age;
|
|
||||||
owner = "munge";
|
|
||||||
group = "munge";
|
|
||||||
};
|
|
||||||
|
|
||||||
services.munge = {
|
|
||||||
enable = true;
|
|
||||||
password = config.age.secrets.mungeKey.path;
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,28 +0,0 @@
|
|||||||
{ config, lib, pkgs, ... }:
|
|
||||||
|
|
||||||
# See also: https://github.com/NixOS/nixpkgs/pull/112010
|
|
||||||
# And: https://github.com/NixOS/nixpkgs/pull/115839
|
|
||||||
|
|
||||||
with lib;
|
|
||||||
|
|
||||||
{
|
|
||||||
systemd.services."prometheus-slurm-exporter" = {
|
|
||||||
wantedBy = [ "multi-user.target" ];
|
|
||||||
after = [ "network.target" ];
|
|
||||||
serviceConfig = {
|
|
||||||
Restart = mkDefault "always";
|
|
||||||
PrivateTmp = mkDefault true;
|
|
||||||
WorkingDirectory = mkDefault "/tmp";
|
|
||||||
DynamicUser = mkDefault true;
|
|
||||||
ExecStart = ''
|
|
||||||
${pkgs.prometheus-slurm-exporter}/bin/prometheus-slurm-exporter --listen-address "127.0.0.1:9341"
|
|
||||||
'';
|
|
||||||
Environment = [
|
|
||||||
"PATH=${pkgs.slurm}/bin"
|
|
||||||
# We need to specify the slurm config to be able to talk to the slurmd
|
|
||||||
# daemon.
|
|
||||||
"SLURM_CONF=${config.services.slurm.etcSlurm}/slurm.conf"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,8 +0,0 @@
|
|||||||
{ ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
networking.firewall = {
|
|
||||||
# Required for PMIx in SLURM, we should find a better way
|
|
||||||
allowedTCPPortRanges = [ { from=1024; to=65535; } ];
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,19 +0,0 @@
|
|||||||
{ ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
# Mount the hut nix store via NFS
|
|
||||||
fileSystems."/mnt/hut-nix-store" = {
|
|
||||||
device = "hut:/nix/store";
|
|
||||||
fsType = "nfs";
|
|
||||||
options = [ "ro" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
systemd.services.slurmd.serviceConfig = {
|
|
||||||
# When running a job, bind the hut store in /nix/store so the paths are
|
|
||||||
# available too.
|
|
||||||
# FIXME: This doesn't keep the programs in /run/current-system/sw/bin
|
|
||||||
# available in the store. Ideally they should be merged but the overlay FS
|
|
||||||
# doesn't work when the underlying directories change.
|
|
||||||
BindReadOnlyPaths = "/mnt/hut-nix-store:/nix/store";
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,14 +1,7 @@
|
|||||||
{ config, pkgs, ... }:
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [ ../common/main.nix ];
|
||||||
../common/xeon.nix
|
|
||||||
../module/ceph.nix
|
|
||||||
../module/emulation.nix
|
|
||||||
../module/slurm-client.nix
|
|
||||||
../module/slurm-firewall.nix
|
|
||||||
../module/debuginfod.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c";
|
||||||
|
@ -1,17 +1,15 @@
|
|||||||
{ config, pkgs, ... }:
|
{ config, pkgs, modulesPath, lib, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||||
../module/ceph.nix
|
../common/main.nix
|
||||||
../module/emulation.nix
|
|
||||||
../module/slurm-client.nix
|
|
||||||
../module/slurm-firewall.nix
|
|
||||||
../module/debuginfod.nix
|
|
||||||
];
|
];
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629";
|
||||||
|
#programs.ssh.forwardX11 = false;
|
||||||
|
#programs.ssh.setXAuthLocation = lib.mkForce true;
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
hostName = "owl2";
|
hostName = "owl2";
|
||||||
@ -19,7 +17,6 @@
|
|||||||
address = "10.0.40.2";
|
address = "10.0.40.2";
|
||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
} ];
|
} ];
|
||||||
# Watch out! The OmniPath device is not in the same place here:
|
|
||||||
interfaces.ibp129s0.ipv4.addresses = [ {
|
interfaces.ibp129s0.ipv4.addresses = [ {
|
||||||
address = "10.0.42.2";
|
address = "10.0.42.2";
|
||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
|
@ -1,64 +0,0 @@
|
|||||||
{ config, pkgs, lib, modulesPath, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
../common/base.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
# Don't install Grub on the disk yet
|
|
||||||
boot.loader.grub.device = "nodev";
|
|
||||||
|
|
||||||
# Enable serial console
|
|
||||||
boot.kernelParams = [
|
|
||||||
"console=tty1"
|
|
||||||
"console=ttyS1,115200"
|
|
||||||
];
|
|
||||||
|
|
||||||
networking = {
|
|
||||||
hostName = "raccoon";
|
|
||||||
# Only BSC DNSs seem to be reachable from the office VLAN
|
|
||||||
nameservers = [ "84.88.52.35" "84.88.52.36" ];
|
|
||||||
defaultGateway = "84.88.51.129";
|
|
||||||
interfaces.eno0.ipv4.addresses = [ {
|
|
||||||
address = "84.88.51.152";
|
|
||||||
prefixLength = 25;
|
|
||||||
} ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Configure Nvidia driver to use with CUDA
|
|
||||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
|
||||||
hardware.graphics.enable = true;
|
|
||||||
nixpkgs.config.allowUnfree = true;
|
|
||||||
nixpkgs.config.nvidia.acceptLicense = true;
|
|
||||||
services.xserver.videoDrivers = [ "nvidia" ];
|
|
||||||
|
|
||||||
users.motd = ''
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⢰⠇⡀⠀⠙⠻⡿⣦⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⡎⢰⣧⠀⠀⠀⠁⠈⠛⢿⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣠⣴⡦⠶⠟⠓⠚⠻⡄⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⣧⠀⣱⣀⣰⣧⠀⢀⠀⣘⣿⣿⣦⣶⣄⣠⡀⠀⠀⣀⣀⣤⣴⣄⣀⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⣿⠿⠏⠁⠀⣀⣠⣶⣿⡶⣿⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⣹⣆⠘⣿⣿⣿⣇⢸⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣾⣿⣿⣿⣿⣿⣿⣿⣿⣶⣶⣦⡀⣀⣤⣠⣤⡾⠋⠀⢀⣤⣶⣿⣿⣿⣿⣿⣿⣿⡀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠘⢿⡄⢼⣿⣿⣿⣿⣿⡟⠻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣵⣾⡾⠙⣋⣩⣽⣿⣿⣿⣿⢋⡼⠁
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠈⢻⣄⠸⢿⣿⣿⠿⠷⠀⠈⠀⣭⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣾⣿⣿⣿⣿⣿⣿⠇⡼⠁⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⢾⣯⡀⠀⢼⡿⠀⠀⠀⢼⠿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⣿⡿⣿⣿⣿⠿⣿⣯⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⢋⡼⠁⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⢻⡏⠠⣦⠁⠀⠀⠀⠀⠀⠟⠛⠛⣿⣿⣿⣿⣿⠿⠁⠀⠁⢿⠙⠁⠀⠛⠹⣿⣏⣾⣿⣿⣿⣿⣿⣿⣿⣿⠿⠃⣹⠁⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⣘⣧⠀⠙⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⡿⡿⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⢹⣿⠿⢿⣿⣿⣿⣿⣿⠋⢀⡤⠛⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⢹⡯⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣿⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠁⠀⢸⣿⣿⣿⠛⠉⠀⣰⠷⠀⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⠇⠀⠀⠀⠀⠀⢀⣿⡇⠀⠀⢻⣿⣿⠁⠀⠀⢠⣾⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⠟⢿⣿⣄⡀⢸⣿⡀⠀⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⢀⣿⠀⠀⠀⢰⣿⣿⡛⣿⣿⡄⢠⡺⠿⡍⠁⢀⣤⣿⣿⣿⠿⣷⣮⣉⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠈⣿⠀⠀⠈⣧⠀⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⢾⠉⠃⠀⣴⣿⣟⠻⣿⣿⣿⡇⢸⣿⣶⠀⢀⣾⣿⣿⣟⠿⣷⣾⣿⣿⣿⣿⣦⣤⣤⡤⠀⠀⠀⠀⠀⠁⠀⠀⠀⣼⠗⠀⠀⠀⠀
|
|
||||||
⠀⠀⠐⢄⡀⠀⠀⠀⢘⡀⠀⢶⣾⣿⣿⣿⣿⡿⠋⠁⠈⠻⠉⠀⠚⠻⣿⣿⣿⣶⣾⣿⣿⣿⣿⣿⣿⣷⣬⣤⣶⣦⡀⣾⣶⣇⠀⠀⠈⢉⣷⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠈⠓⠶⢦⡽⠄⣈⣿⣿⣿⣿⣿⠏⠀⠀⠀⠀⠀⠀⠀⠀⠀⠹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡓⠙⣿⡟⠀⠀⠀⠈⠛⣷⣶⡄⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⢀⣬⠆⢠⣍⣛⠻⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣉⣀⡀⠀⠀⠈⠛⢿⣦⡀
|
|
||||||
⠐⠒⠒⠶⠶⠶⢦⣬⣟⣥⣀⡉⠛⠻⠶⢁⣤⣾⣿⣿⣿⣷⡄⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⣿⣟⡛⠿⠭⠭⠭⠭⠭⠿⠿⠿⢿⣿⣟⠃⠀⠀⠀⠹⣟⠓
|
|
||||||
⠀⣀⣠⠤⠤⢤⣤⣾⣤⡄⣉⣉⣙⣓⡂⣿⣿⣭⣹⣿⣿⣿⣿⡰⣂⣀⢀⠀⠻⣿⠛⠻⠟⠡⣶⣾⣿⣿⣿⣿⣿⣿⣿⡖⠒⠒⠒⠛⠷⢤⡀⢰⣴⣿⡆
|
|
||||||
⠀⠀⠀⢀⣠⡴⠾⠟⠻⣟⡉⠉⠉⠉⢁⢿⣿⣿⣿⣿⣿⣿⡿⣱⣿⣭⡌⠤⠀⠀⠐⣶⣌⡻⣶⣭⡻⢿⣿⣿⣿⣿⣿⣯⣥⣤⣦⠀⠠⣴⣶⣶⣿⡟⢿
|
|
||||||
⢀⠔⠊⠉⠀⠀⠀⠀⢸⣯⣤⠀⠀⠠⣼⣮⣟⣿⣿⣿⣻⣭⣾⣿⣿⣷⣶⣦⠶⣚⣾⣿⣿⣷⣜⣿⣿⣶⣝⢿⣿⣿⣿⣿⣷⣦⣄⣰⡄⠈⢿⣿⡿⣇⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠈⢡⢇⠀⠀⣠⣿⣿⣿⣯⣟⣛⣛⣛⣛⣛⣩⣭⣴⣶⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣻⣿⣧⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⣾⠏⠀⢹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣦⣍⣿⣿⣿⣿⡄⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣾⡁⢈⣾⣿⡿⠛⣛⣿⣿⣿⣿ DO YOU BRING FEEDS? ⣿⣿⣿⣿⣿⣿⡏⠈⠙⠈⠁⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⡿⠛⠉⣽⣿⣷⣾⡿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠌⠛⠉⠀⠁⠀⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠹⠋⠀⢻⣿⣿⣿⣿⠿⢿⣿⣿⣿⣿⣿⣿⠿⣿⣿⣿⣿⠿⠛⠋⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
|
||||||
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⠉⠁⠀⠀⠀⠀⠀⠈⠉⠉⠀⠀⠈⠋⠉⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
|
||||||
'';
|
|
||||||
}
|
|
405
pkgs/ceph.nix
Normal file
405
pkgs/ceph.nix
Normal file
@ -0,0 +1,405 @@
|
|||||||
|
{ lib
|
||||||
|
, stdenv
|
||||||
|
, runCommand
|
||||||
|
, fetchurl
|
||||||
|
, fetchFromGitHub
|
||||||
|
, fetchPypi
|
||||||
|
|
||||||
|
# Build time
|
||||||
|
, cmake
|
||||||
|
, ensureNewerSourcesHook
|
||||||
|
, fmt
|
||||||
|
, git
|
||||||
|
, makeWrapper
|
||||||
|
, nasm
|
||||||
|
, pkg-config
|
||||||
|
, which
|
||||||
|
|
||||||
|
# Tests
|
||||||
|
, nixosTests
|
||||||
|
|
||||||
|
# Runtime dependencies
|
||||||
|
, arrow-cpp
|
||||||
|
, babeltrace
|
||||||
|
, boost179
|
||||||
|
, bzip2
|
||||||
|
, cryptsetup
|
||||||
|
, cunit
|
||||||
|
, doxygen
|
||||||
|
, gperf
|
||||||
|
, graphviz
|
||||||
|
, gtest
|
||||||
|
, icu
|
||||||
|
, libcap
|
||||||
|
, libcap_ng
|
||||||
|
, libnl
|
||||||
|
, libxml2
|
||||||
|
, lttng-ust
|
||||||
|
, lua
|
||||||
|
, lz4
|
||||||
|
, oath-toolkit
|
||||||
|
, openldap
|
||||||
|
, python310
|
||||||
|
, rdkafka
|
||||||
|
, rocksdb
|
||||||
|
, snappy
|
||||||
|
, sqlite
|
||||||
|
, utf8proc
|
||||||
|
, zlib
|
||||||
|
, zstd
|
||||||
|
|
||||||
|
# Optional Dependencies
|
||||||
|
, curl ? null
|
||||||
|
, expat ? null
|
||||||
|
, fuse ? null
|
||||||
|
, libatomic_ops ? null
|
||||||
|
, libedit ? null
|
||||||
|
, libs3 ? null
|
||||||
|
, yasm ? null
|
||||||
|
|
||||||
|
# Mallocs
|
||||||
|
, gperftools ? null
|
||||||
|
, jemalloc ? null
|
||||||
|
|
||||||
|
# Crypto Dependencies
|
||||||
|
, cryptopp ? null
|
||||||
|
, nspr ? null
|
||||||
|
, nss ? null
|
||||||
|
|
||||||
|
# Linux Only Dependencies
|
||||||
|
, linuxHeaders
|
||||||
|
, util-linux
|
||||||
|
, libuuid
|
||||||
|
, udev
|
||||||
|
, keyutils
|
||||||
|
, rdma-core
|
||||||
|
, rabbitmq-c
|
||||||
|
, libaio ? null
|
||||||
|
, libxfs ? null
|
||||||
|
, liburing ? null
|
||||||
|
, zfs ? null
|
||||||
|
, ...
|
||||||
|
}:
|
||||||
|
|
||||||
|
# We must have one crypto library
|
||||||
|
assert cryptopp != null || (nss != null && nspr != null);
|
||||||
|
|
||||||
|
let
|
||||||
|
shouldUsePkg = pkg: if pkg != null && pkg.meta.available then pkg else null;
|
||||||
|
|
||||||
|
optYasm = shouldUsePkg yasm;
|
||||||
|
optExpat = shouldUsePkg expat;
|
||||||
|
optCurl = shouldUsePkg curl;
|
||||||
|
optFuse = shouldUsePkg fuse;
|
||||||
|
optLibedit = shouldUsePkg libedit;
|
||||||
|
optLibatomic_ops = shouldUsePkg libatomic_ops;
|
||||||
|
optLibs3 = shouldUsePkg libs3;
|
||||||
|
|
||||||
|
optJemalloc = shouldUsePkg jemalloc;
|
||||||
|
optGperftools = shouldUsePkg gperftools;
|
||||||
|
|
||||||
|
optCryptopp = shouldUsePkg cryptopp;
|
||||||
|
optNss = shouldUsePkg nss;
|
||||||
|
optNspr = shouldUsePkg nspr;
|
||||||
|
|
||||||
|
optLibaio = shouldUsePkg libaio;
|
||||||
|
optLibxfs = shouldUsePkg libxfs;
|
||||||
|
optZfs = shouldUsePkg zfs;
|
||||||
|
|
||||||
|
# Downgrade rocksdb, 7.10 breaks ceph
|
||||||
|
rocksdb' = rocksdb.overrideAttrs {
|
||||||
|
version = "7.9.2";
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "facebook";
|
||||||
|
repo = "rocksdb";
|
||||||
|
rev = "refs/tags/v7.9.2";
|
||||||
|
hash = "sha256-5P7IqJ14EZzDkbjaBvbix04ceGGdlWBuVFH/5dpD5VM=";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
hasRadosgw = optExpat != null && optCurl != null && optLibedit != null;
|
||||||
|
|
||||||
|
# Malloc implementation (can be jemalloc, tcmalloc or null)
|
||||||
|
malloc = if optJemalloc != null then optJemalloc else optGperftools;
|
||||||
|
|
||||||
|
# We prefer nss over cryptopp
|
||||||
|
cryptoStr = if optNss != null && optNspr != null then "nss" else
|
||||||
|
if optCryptopp != null then "cryptopp" else "none";
|
||||||
|
|
||||||
|
cryptoLibsMap = {
|
||||||
|
nss = [ optNss optNspr ];
|
||||||
|
cryptopp = [ optCryptopp ];
|
||||||
|
none = [ ];
|
||||||
|
};
|
||||||
|
|
||||||
|
getMeta = description: with lib; {
|
||||||
|
homepage = "https://ceph.io/en/";
|
||||||
|
inherit description;
|
||||||
|
license = with licenses; [ lgpl21 gpl2 bsd3 mit publicDomain ];
|
||||||
|
maintainers = with maintainers; [ adev ak johanot krav ];
|
||||||
|
platforms = [ "x86_64-linux" "aarch64-linux" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
ceph-common = with python.pkgs; buildPythonPackage {
|
||||||
|
pname = "ceph-common";
|
||||||
|
inherit src version;
|
||||||
|
|
||||||
|
sourceRoot = "ceph-${version}/src/python-common";
|
||||||
|
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
pyyaml
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeCheckInputs = [
|
||||||
|
pytestCheckHook
|
||||||
|
];
|
||||||
|
|
||||||
|
disabledTests = [
|
||||||
|
# requires network access
|
||||||
|
"test_valid_addr"
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = getMeta "Ceph common module for code shared by manager modules";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Watch out for python <> boost compatibility
|
||||||
|
python = python310.override {
|
||||||
|
packageOverrides = self: super: {
|
||||||
|
sqlalchemy = super.sqlalchemy.overridePythonAttrs rec {
|
||||||
|
version = "1.4.46";
|
||||||
|
src = fetchPypi {
|
||||||
|
pname = "SQLAlchemy";
|
||||||
|
inherit version;
|
||||||
|
hash = "sha256-aRO4JH2KKS74MVFipRkx4rQM6RaB8bbxj2lwRSAMSjA=";
|
||||||
|
};
|
||||||
|
disabledTestPaths = [
|
||||||
|
"test/aaa_profiling"
|
||||||
|
"test/ext/mypy"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
boost = boost179.override {
|
||||||
|
enablePython = true;
|
||||||
|
inherit python;
|
||||||
|
};
|
||||||
|
|
||||||
|
# TODO: split this off in build and runtime environment
|
||||||
|
ceph-python-env = python.withPackages (ps: with ps; [
|
||||||
|
ceph-common
|
||||||
|
|
||||||
|
# build time
|
||||||
|
cython
|
||||||
|
|
||||||
|
# debian/control
|
||||||
|
bcrypt
|
||||||
|
cherrypy
|
||||||
|
influxdb
|
||||||
|
jinja2
|
||||||
|
kubernetes
|
||||||
|
natsort
|
||||||
|
numpy
|
||||||
|
pecan
|
||||||
|
prettytable
|
||||||
|
pyjwt
|
||||||
|
pyopenssl
|
||||||
|
python-dateutil
|
||||||
|
pyyaml
|
||||||
|
requests
|
||||||
|
routes
|
||||||
|
scikit-learn
|
||||||
|
scipy
|
||||||
|
setuptools
|
||||||
|
sphinx
|
||||||
|
virtualenv
|
||||||
|
werkzeug
|
||||||
|
|
||||||
|
# src/pybind/mgr/requirements-required.txt
|
||||||
|
cryptography
|
||||||
|
jsonpatch
|
||||||
|
|
||||||
|
# src/tools/cephfs/shell/setup.py
|
||||||
|
cmd2
|
||||||
|
colorama
|
||||||
|
]);
|
||||||
|
inherit (ceph-python-env.python) sitePackages;
|
||||||
|
|
||||||
|
version = "18.2.0";
|
||||||
|
src = fetchurl {
|
||||||
|
url = "https://download.ceph.com/tarballs/ceph-${version}.tar.gz";
|
||||||
|
hash = "sha256:0k9nl6xi5brva51rr14m7ig27mmmd7vrpchcmqc40q3c2khn6ns9";
|
||||||
|
};
|
||||||
|
in rec {
|
||||||
|
ceph = stdenv.mkDerivation {
|
||||||
|
pname = "ceph";
|
||||||
|
inherit src version;
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
fmt
|
||||||
|
git
|
||||||
|
makeWrapper
|
||||||
|
nasm
|
||||||
|
pkg-config
|
||||||
|
python
|
||||||
|
python.pkgs.python # for the toPythonPath function
|
||||||
|
python.pkgs.wrapPython
|
||||||
|
which
|
||||||
|
(ensureNewerSourcesHook { year = "1980"; })
|
||||||
|
# for building docs/man-pages presumably
|
||||||
|
doxygen
|
||||||
|
graphviz
|
||||||
|
];
|
||||||
|
|
||||||
|
enableParallelBuilding = true;
|
||||||
|
|
||||||
|
buildInputs = cryptoLibsMap.${cryptoStr} ++ [
|
||||||
|
arrow-cpp
|
||||||
|
babeltrace
|
||||||
|
boost
|
||||||
|
bzip2
|
||||||
|
ceph-python-env
|
||||||
|
cryptsetup
|
||||||
|
cunit
|
||||||
|
gperf
|
||||||
|
gtest
|
||||||
|
icu
|
||||||
|
libcap
|
||||||
|
libnl
|
||||||
|
libxml2
|
||||||
|
lttng-ust
|
||||||
|
lua
|
||||||
|
lz4
|
||||||
|
malloc
|
||||||
|
oath-toolkit
|
||||||
|
openldap
|
||||||
|
optLibatomic_ops
|
||||||
|
optLibs3
|
||||||
|
optYasm
|
||||||
|
rdkafka
|
||||||
|
rocksdb'
|
||||||
|
snappy
|
||||||
|
sqlite
|
||||||
|
utf8proc
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
] ++ lib.optionals stdenv.isLinux [
|
||||||
|
keyutils
|
||||||
|
libcap_ng
|
||||||
|
liburing
|
||||||
|
libuuid
|
||||||
|
linuxHeaders
|
||||||
|
optLibaio
|
||||||
|
optLibxfs
|
||||||
|
optZfs
|
||||||
|
rabbitmq-c
|
||||||
|
rdma-core
|
||||||
|
udev
|
||||||
|
util-linux
|
||||||
|
] ++ lib.optionals hasRadosgw [
|
||||||
|
optCurl
|
||||||
|
optExpat
|
||||||
|
optFuse
|
||||||
|
optLibedit
|
||||||
|
];
|
||||||
|
|
||||||
|
pythonPath = [ ceph-python-env "${placeholder "out"}/${ceph-python-env.sitePackages}" ];
|
||||||
|
|
||||||
|
preConfigure =''
|
||||||
|
substituteInPlace src/common/module.c --replace "/sbin/modinfo" "modinfo"
|
||||||
|
substituteInPlace src/common/module.c --replace "/sbin/modprobe" "modprobe"
|
||||||
|
substituteInPlace src/common/module.c --replace "/bin/grep" "grep"
|
||||||
|
|
||||||
|
# install target needs to be in PYTHONPATH for "*.pth support" check to succeed
|
||||||
|
# set PYTHONPATH, so the build system doesn't silently skip installing ceph-volume and others
|
||||||
|
export PYTHONPATH=${ceph-python-env}/${sitePackages}:$lib/${sitePackages}:$out/${sitePackages}
|
||||||
|
patchShebangs src/
|
||||||
|
'';
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_INSTALL_DATADIR=${placeholder "lib"}/lib"
|
||||||
|
|
||||||
|
"-DWITH_CEPHFS_SHELL:BOOL=ON"
|
||||||
|
"-DWITH_SYSTEMD:BOOL=OFF"
|
||||||
|
# `WITH_JAEGER` requires `thrift` as a depenedncy (fine), but the build fails with:
|
||||||
|
# CMake Error at src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-Release.cmake:49 (message):
|
||||||
|
# Command failed: 2
|
||||||
|
#
|
||||||
|
# 'make' 'opentelemetry_trace' 'opentelemetry_exporter_jaeger_trace'
|
||||||
|
#
|
||||||
|
# See also
|
||||||
|
#
|
||||||
|
# /build/ceph-18.2.0/build/src/opentelemetry-cpp/src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-*.log
|
||||||
|
# and that file contains:
|
||||||
|
# /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc: In member function 'virtual void opentelemetry::v1::exporter::jaeger::TUDPTransport::close()':
|
||||||
|
# /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc:71:7: error: '::close' has not been declared; did you mean 'pclose'?
|
||||||
|
# 71 | ::THRIFT_CLOSESOCKET(socket_);
|
||||||
|
# | ^~~~~~~~~~~~~~~~~~
|
||||||
|
# Looks like `close()` is somehow not included.
|
||||||
|
# But the relevant code is already removed in `open-telemetry` 1.10: https://github.com/open-telemetry/opentelemetry-cpp/pull/2031
|
||||||
|
# So it's proably not worth trying to fix that for this Ceph version,
|
||||||
|
# and instead just disable Ceph's Jaeger support.
|
||||||
|
"-DWITH_JAEGER:BOOL=OFF"
|
||||||
|
"-DWITH_TESTS:BOOL=OFF"
|
||||||
|
|
||||||
|
# Use our own libraries, where possible
|
||||||
|
"-DWITH_SYSTEM_ARROW:BOOL=ON" # Only used if other options enable Arrow support.
|
||||||
|
"-DWITH_SYSTEM_BOOST:BOOL=ON"
|
||||||
|
"-DWITH_SYSTEM_GTEST:BOOL=ON"
|
||||||
|
"-DWITH_SYSTEM_ROCKSDB:BOOL=ON"
|
||||||
|
"-DWITH_SYSTEM_UTF8PROC:BOOL=ON"
|
||||||
|
"-DWITH_SYSTEM_ZSTD:BOOL=ON"
|
||||||
|
|
||||||
|
# TODO breaks with sandbox, tries to download stuff with npm
|
||||||
|
"-DWITH_MGR_DASHBOARD_FRONTEND:BOOL=OFF"
|
||||||
|
# WITH_XFS has been set default ON from Ceph 16, keeping it optional in nixpkgs for now
|
||||||
|
''-DWITH_XFS=${if optLibxfs != null then "ON" else "OFF"}''
|
||||||
|
] ++ lib.optional stdenv.isLinux "-DWITH_SYSTEM_LIBURING=ON";
|
||||||
|
|
||||||
|
postFixup = ''
|
||||||
|
wrapPythonPrograms
|
||||||
|
wrapProgram $out/bin/ceph-mgr --prefix PYTHONPATH ":" "$(toPythonPath ${placeholder "out"}):$(toPythonPath ${ceph-python-env})"
|
||||||
|
|
||||||
|
# Test that ceph-volume exists since the build system has a tendency to
|
||||||
|
# silently drop it with misconfigurations.
|
||||||
|
test -f $out/bin/ceph-volume
|
||||||
|
'';
|
||||||
|
|
||||||
|
outputs = [ "out" "lib" "dev" "doc" "man" ];
|
||||||
|
|
||||||
|
doCheck = false; # uses pip to install things from the internet
|
||||||
|
|
||||||
|
# Takes 7+h to build with 2 cores.
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
meta = getMeta "Distributed storage system";
|
||||||
|
|
||||||
|
passthru = {
|
||||||
|
inherit version;
|
||||||
|
tests = {
|
||||||
|
inherit (nixosTests)
|
||||||
|
ceph-multi-node
|
||||||
|
ceph-single-node
|
||||||
|
ceph-single-node-bluestore;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
ceph-client = runCommand "ceph-client-${version}" {
|
||||||
|
meta = getMeta "Tools needed to mount Ceph's RADOS Block Devices/Cephfs";
|
||||||
|
} ''
|
||||||
|
mkdir -p $out/{bin,etc,${sitePackages},share/bash-completion/completions}
|
||||||
|
cp -r ${ceph}/bin/{ceph,.ceph-wrapped,rados,rbd,rbdmap} $out/bin
|
||||||
|
cp -r ${ceph}/bin/ceph-{authtool,conf,dencoder,rbdnamer,syn} $out/bin
|
||||||
|
cp -r ${ceph}/bin/rbd-replay* $out/bin
|
||||||
|
cp -r ${ceph}/sbin/mount.ceph $out/bin
|
||||||
|
cp -r ${ceph}/sbin/mount.fuse.ceph $out/bin
|
||||||
|
ln -s bin $out/sbin
|
||||||
|
cp -r ${ceph}/${sitePackages}/* $out/${sitePackages}
|
||||||
|
cp -r ${ceph}/etc/bash_completion.d $out/share/bash-completion/completions
|
||||||
|
# wrapPythonPrograms modifies .ceph-wrapped, so lets just update its paths
|
||||||
|
substituteInPlace $out/bin/ceph --replace ${ceph} $out
|
||||||
|
substituteInPlace $out/bin/.ceph-wrapped --replace ${ceph} $out
|
||||||
|
'';
|
||||||
|
}
|
@ -1,36 +0,0 @@
|
|||||||
diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
|
|
||||||
index 33e88bc..ee3641c 100644
|
|
||||||
--- a/src/util/mpir_hwtopo.c
|
|
||||||
+++ b/src/util/mpir_hwtopo.c
|
|
||||||
@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
|
|
||||||
#ifdef HAVE_HWLOC
|
|
||||||
bindset = hwloc_bitmap_alloc();
|
|
||||||
hwloc_topology_init(&hwloc_topology);
|
|
||||||
- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
|
|
||||||
- if (xmlfile != NULL) {
|
|
||||||
- int rc;
|
|
||||||
- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
|
|
||||||
- if (rc == 0) {
|
|
||||||
- /* To have hwloc still actually call OS-specific hooks, the
|
|
||||||
- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
|
|
||||||
- * file is really the underlying system. */
|
|
||||||
- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
|
|
||||||
- }
|
|
||||||
- MPL_free(xmlfile);
|
|
||||||
- }
|
|
||||||
|
|
||||||
hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
|
|
||||||
if (!hwloc_topology_load(hwloc_topology))
|
|
||||||
|
|
||||||
--- a/src/mpi/init/local_proc_attrs.c
|
|
||||||
+++ b/src/mpi/init/local_proc_attrs.c
|
|
||||||
@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
|
|
||||||
/* Set the number of tag bits. The device may override this value. */
|
|
||||||
MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
|
|
||||||
|
|
||||||
- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
|
|
||||||
- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
|
|
||||||
- MPL_free(requested_kinds);
|
|
||||||
-
|
|
||||||
return mpi_errno;
|
|
||||||
}
|
|
@ -1,45 +1,35 @@
|
|||||||
final: prev:
|
final: prev:
|
||||||
{
|
{
|
||||||
# Set MPICH as default
|
bsc = prev.bsc.extend (bscFinal: bscPrev: {
|
||||||
mpi = final.mpich;
|
# Set MPICH as default
|
||||||
|
mpi = bscFinal.mpich;
|
||||||
|
|
||||||
# Configure the network for MPICH
|
# Configure the network for MPICH
|
||||||
mpich = with final; let
|
mpich = with final; prev.mpich.overrideAttrs (old: {
|
||||||
# pmix comes with the libraries in .out and headers in .dev
|
buildInput = old.buildInputs ++ [
|
||||||
pmixAll = symlinkJoin {
|
libfabric
|
||||||
name = "pmix-all";
|
pmix
|
||||||
paths = [ pmix.dev pmix.out ];
|
];
|
||||||
};
|
configureFlags = [
|
||||||
in prev.mpich.overrideAttrs (old: {
|
"--enable-shared"
|
||||||
patches = [
|
"--enable-sharedlib"
|
||||||
# See https://github.com/pmodels/mpich/issues/6946
|
"--with-pm=no"
|
||||||
./mpich-fix-hwtopo.patch
|
"--with-device=ch4:ofi"
|
||||||
];
|
"--with-pmi=pmix"
|
||||||
buildInput = old.buildInputs ++ [
|
"--with-pmix=${final.pmix}"
|
||||||
libfabric
|
"--with-libfabric=${final.libfabric}"
|
||||||
pmixAll
|
"--enable-g=log"
|
||||||
];
|
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
|
||||||
configureFlags = [
|
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
|
||||||
"--enable-shared"
|
"FCFLAGS=-fallow-argument-mismatch"
|
||||||
"--enable-sharedlib"
|
];
|
||||||
"--with-pm=no"
|
});
|
||||||
"--with-device=ch4:ofi"
|
|
||||||
"--with-pmi=pmix"
|
|
||||||
"--with-pmix=${pmixAll}"
|
|
||||||
"--with-libfabric=${libfabric}"
|
|
||||||
"--enable-g=log"
|
|
||||||
] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [
|
|
||||||
"FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300
|
|
||||||
"FCFLAGS=-fallow-argument-mismatch"
|
|
||||||
];
|
|
||||||
});
|
});
|
||||||
|
|
||||||
slurm = prev.slurm.overrideAttrs (old: {
|
# Update ceph to 18.2.0 until it lands in nixpkgs, see:
|
||||||
patches = (old.patches or []) ++ [
|
# https://github.com/NixOS/nixpkgs/pull/247849
|
||||||
# See https://bugs.schedmd.com/show_bug.cgi?id=19324
|
inherit (prev.callPackage ./ceph.nix {
|
||||||
./slurm-rank-expansion.patch
|
lua = prev.lua5_4;
|
||||||
];
|
fmt = prev.fmt_8;
|
||||||
});
|
}) ceph ceph-client;
|
||||||
|
|
||||||
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
|
|
||||||
}
|
}
|
||||||
|
@ -1,22 +0,0 @@
|
|||||||
{ buildGoModule, fetchFromGitHub, lib }:
|
|
||||||
|
|
||||||
buildGoModule rec {
|
|
||||||
pname = "prometheus-slurm-exporter";
|
|
||||||
version = "0.20";
|
|
||||||
|
|
||||||
src = fetchFromGitHub {
|
|
||||||
rev = version;
|
|
||||||
owner = "vpenso";
|
|
||||||
repo = pname;
|
|
||||||
sha256 = "sha256-KS9LoDuLQFq3KoKpHd8vg1jw20YCNRJNJrnBnu5vxvs=";
|
|
||||||
};
|
|
||||||
|
|
||||||
vendorHash = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI=";
|
|
||||||
doCheck = false;
|
|
||||||
|
|
||||||
meta = with lib; {
|
|
||||||
description = "Prometheus SLURM Exporter";
|
|
||||||
homepage = "https://github.com/vpenso/prometheus-slurm-exporter";
|
|
||||||
platforms = platforms.linux;
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
--- a/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:05:24.815313882 +0100
|
|
||||||
+++ b/src/plugins/mpi/pmix/pmixp_dmdx.c 2024-03-15 13:09:53.936900823 +0100
|
|
||||||
@@ -314,7 +314,7 @@ static void _dmdx_req(buf_t *buf, int no
|
|
||||||
}
|
|
||||||
|
|
||||||
nsptr = pmixp_nspaces_local();
|
|
||||||
- if (nsptr->ntasks <= rank) {
|
|
||||||
+ if ((long) nsptr->ntasks <= (long) rank) {
|
|
||||||
char *nodename = pmixp_info_job_host(nodeid);
|
|
||||||
PMIXP_ERROR("Bad request from %s: nspace \"%s\" has only %d ranks, asked for %d",
|
|
||||||
nodename, ns, nsptr->ntasks, rank);
|
|
@ -1,21 +0,0 @@
|
|||||||
age-encryption.org/v1
|
|
||||||
-> ssh-ed25519 AY8zKw J00a6ZOhkupkhLU5WQ0kD05HEF4KKsSs2hwjHKbnnHU
|
|
||||||
J14VoNOCqLpScVO7OLXbqTcLI4tcVUHt5cqY/XQmbGs
|
|
||||||
-> ssh-ed25519 sgAamA k8R/bSUdvVmlBI6yHPi5NBQPBGM36lPJwsir8DFGgxE
|
|
||||||
4ZKC3gYvic6AVrNGgNjwztbUzhxP8ViX5O3wFo9wlrk
|
|
||||||
-> ssh-ed25519 HY2yRg 966xf2fTnA6Wq0uYXbXZQOManqITJcCbQS9LZCGEOh4
|
|
||||||
Qg5echQSrzqeDqvaMx+5fqi8XyTjAeCsY/UFJX6YnDs
|
|
||||||
-> ssh-ed25519 tcumPQ e0U2okrGIoUpLfPYjIRx1V92rE3hZW13nJef+l3kBQg
|
|
||||||
LejAUKBl+tPhwocCF00ZHTzFISnwX8og8GvemiMIcyo
|
|
||||||
-> ssh-ed25519 JJ1LWg QkzTsPq9Gdh+FNz/a4bDb9LQOreFyxeTC51UNd1fsj0
|
|
||||||
ayrlKenETfQzH1Z9drVEWqszQebicGVJve0/pCnxAE8
|
|
||||||
-> ssh-ed25519 CAWG4Q lJLW9+dxvyoD4hYzeXeE/4rzJ6HIeEQOB1+fbhV3xw0
|
|
||||||
T2RrVCtTuQvya9HiJB7txk3QGrntpsMX9Tt1cyXoW5E
|
|
||||||
-> ssh-ed25519 MSF3dg JOZkFb2CfqWKvZIz7lYxXWgv8iEVDkQF8hInDMZvknc
|
|
||||||
MHDWxjUw4dNiC1h4MrU9uKKcI3rwkxABm0+5FYMZkok
|
|
||||||
-> ~8m;7f-grease
|
|
||||||
lDIullfC98RhpTZ4Mk87Td+VtPmwPdgz+iIilpKugUkmV5r4Uqd7yE+5ArA6ekr/
|
|
||||||
G/X4EA
|
|
||||||
--- Cz4sv9ZunBcVdZCozdTh1zlg1zIASjk2MjYeYfcN9eA
|
|
||||||
ÊN Å$[H˜ÝQËéŠ
|
|
||||||
d£š·'±ö7…·Í²)ÖØÀÊx9yüÐëE¡þÓM7^Ø[ÐMŽ+É&éâö½$8tM¨Ð²
|
|
@ -1,9 +0,0 @@
|
|||||||
age-encryption.org/v1
|
|
||||||
-> ssh-ed25519 HY2yRg DQdgCk16Yu524BsrWVf0krnwWzDM6SeaJCgQipOfwCA
|
|
||||||
Ab9ocqra/UWJZI+QGMlxUhBu5AzqfjPgXl+ENIiHYGs
|
|
||||||
-> ssh-ed25519 CAWG4Q KF9rGCenb3nf+wyz2hyVs/EUEbsmUs5R+1fBxlCibC8
|
|
||||||
7++Kxbr3FHVdVfnFdHYdAuR0Tgfd+sRcO6WRss6LhEw
|
|
||||||
-> ssh-ed25519 MSF3dg aUe4DhRsu4X8CFOEAnD/XM/o/0qHYSB522woCaAVh0I
|
|
||||||
GRcs5cm2YqA/lGhUtbpboBaz7mfgiLaCr+agaB7vACU
|
|
||||||
--- 9Q7Ou+Pxq+3RZilCb2dKC/pCFjZEt4rp5KnTUUU7WJ8
|
|
||||||
1¬Mw4‘Í ì:Hµ@Á/ägLtMÇ,ßÆ¥ô*¡žzñNV5ˆm‚ÍNŽoÞáj1$÷TøG_³E{Œ%“‰1ǯ‘<>H£îAÛp™
|
|
@ -1,9 +0,0 @@
|
|||||||
age-encryption.org/v1
|
|
||||||
-> ssh-ed25519 HY2yRg WvKK6U1wQtx2pbUDfuaUIXTQiCulDkz7hgUCSwMfMzQ
|
|
||||||
jLktUMqKuVxukqzz++pHOKvmucUQqeKYy5IwBma7KxY
|
|
||||||
-> ssh-ed25519 CAWG4Q XKGuNNoYFl9bdZzsqYYTY7GsEt5sypLW4R+1uk78NmU
|
|
||||||
8dIA2GzRAwTGM5CDHSM2BUBsbXzEAUssWUz2PY2PaTg
|
|
||||||
-> ssh-ed25519 MSF3dg T630RsKuZIF/bp+KITnIIWWHsg6M/VQGqbWQZxqT+AA
|
|
||||||
SraZcgZJVtmUzHF/XR9J7aK5t5EDNpkC/av/WJUT/G8
|
|
||||||
--- /12G8pj9sbs591OM/ryhoLnSWWmzYcoqprk9uN/3g18
|
|
||||||
ä·ù¼Â‡%å]yi"ô<>»LÓâùH`ªa$Æþ)¦9ve<76>.0úmÉK<EFBFBD>vƒÀïu"|1cÞ-%ÔÕ"åWFï¡ÞA«<41>hº$•ºj<eñ¶xÅLx«ç.?œÈâ:L…¬–ƒ,ëu»|³‹F|Õi²äÔ
|
|
Binary file not shown.
@ -1,10 +0,0 @@
|
|||||||
age-encryption.org/v1
|
|
||||||
-> ssh-ed25519 HY2yRg 3L1Y5upc5qN6fgiFAox5rD/W8n0eQUv5mT39QAdO5Ac
|
|
||||||
XkWsmPmzRgHjsvJgsDKJRgHZ7/sBZFmd1Doppj/y390
|
|
||||||
-> ssh-ed25519 CAWG4Q v03Qr+fckdIpsxvQG/viKxlF8WNpO4XUe//QcPzH4k0
|
|
||||||
afUwi3ccDCRfUxPDdF7ZkoL+0UX1XwqVtiyabDWjVQk
|
|
||||||
-> ssh-ed25519 MSF3dg c2hEUk4LslJpiL7v/4UpT8fK7ZiBJ8+uRhZ/vBoRUDE
|
|
||||||
YX9EpnJpHo1eDsZtapTVY6jD+81kb588Oik4NoY9jro
|
|
||||||
--- LhUkopNtCsyHCLzEYzBFs+vekOkAR4B3VBaiMF/ZF8w
|
|
||||||
oË<EFBFBD>…×à»ÂC‚ßHãáàùýy—LØ”ItMèÕåµI×±sMÆ\Í1-±K”ˆ¤‰G:õ™<02>¦
|
|
||||||
ÝgáºÙbpF¼Ó¶Í%Y·
|
|
Binary file not shown.
@ -1,12 +0,0 @@
|
|||||||
age-encryption.org/v1
|
|
||||||
-> ssh-ed25519 HY2yRg d144D+VvxhYgKtH//uD2qNuVnYX6bh74YqkyM3ZjBwU
|
|
||||||
0IeVmFAf4U8Sm0d01O6ZwJ1V2jl/mSMl4wF0MP5LrIg
|
|
||||||
-> ssh-ed25519 CAWG4Q H4nKxue/Cj/3KUF5A+/ygHMjjArwgx3SIWwXcqFtyUo
|
|
||||||
4k5NJkLUrueLYiPkr2LAwQLWmuaOIsDmV/86ravpleU
|
|
||||||
-> ssh-ed25519 MSF3dg HpgUAFHLPs4w0cdJHqTwf8lySkTeV9O9NnBf49ClDHs
|
|
||||||
foPIUUgAYe1YSDy6+aMfjN7xv9xud9fDmhRlIztHoEo
|
|
||||||
-> vLkF\<-grease
|
|
||||||
3GRT+W8gYSpjl/a6Ix9+g9UJnTpl1ZH/oucfR801vfE8y77DV2Jxz/XJwzxYxKG5
|
|
||||||
YEhiTGMNbXw/V7E5aVSz6Bdc
|
|
||||||
--- GtiHKCZdHByq9j0BSLd544PhbEwTN138E8TFdxipeiA
|
|
||||||
¥¿£‹„ÝG$Sº¼ƒRAæÀ¾Th]nÄ8<C384>,ùHœsÈïÚ=p¼™Ù'»<>ô+ôjõÓõŒ9±)ñ:”)‘¸œYâþÑ8³IØõ8:ol<6F>ë’<1F>åÃZÐæ3–PM”F;ÊrYõ“ÞÛ<1F>$¨y¸LâÙœ¦ÎœàÕUús16Ǿ¡LŒb÷¨²
|
|
@ -1,17 +0,0 @@
|
|||||||
let
|
|
||||||
keys = import ../keys.nix;
|
|
||||||
adminsKeys = builtins.attrValues keys.admins;
|
|
||||||
hut = [ keys.hosts.hut ] ++ adminsKeys;
|
|
||||||
# Only expose ceph keys to safe nodes and admins
|
|
||||||
safe = keys.hostGroup.safe ++ adminsKeys;
|
|
||||||
in
|
|
||||||
{
|
|
||||||
"gitea-runner-token.age".publicKeys = hut;
|
|
||||||
"gitlab-runner-docker-token.age".publicKeys = hut;
|
|
||||||
"gitlab-runner-shell-token.age".publicKeys = hut;
|
|
||||||
"nix-serve.age".publicKeys = hut;
|
|
||||||
"jungle-robot-password.age".publicKeys = hut;
|
|
||||||
|
|
||||||
"ceph-user.age".publicKeys = safe;
|
|
||||||
"munge-key.age".publicKeys = safe;
|
|
||||||
}
|
|
@ -17,6 +17,6 @@ Then, to request access to the machines we will need some information about you:
|
|||||||
1. The salted hash of your login password, generated with `mkpasswd -m sha-512`
|
1. The salted hash of your login password, generated with `mkpasswd -m sha-512`
|
||||||
1. An SSH public key of type Ed25519 (can be generated with `ssh-keygen -t ed25519`)
|
1. An SSH public key of type Ed25519 (can be generated with `ssh-keygen -t ed25519`)
|
||||||
|
|
||||||
Send an email to <jungle@bsc.es> with the details, or directly open a
|
You can send us both an email at <rodrigo.arias@bsc.es> and
|
||||||
merge request in the [jungle
|
<aleix.rocanonell@bsc.es> with the details, or directly open a merge request in
|
||||||
repository](https://pm.bsc.es/gitlab/rarias/jungle/).
|
the [jungle repository](https://pm.bsc.es/gitlab/rarias/jungle/).
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
---
|
|
||||||
title: "Git"
|
|
||||||
description: "Gitea instance"
|
|
||||||
---
|
|
||||||
|
|
||||||
If you are reading this page, the proxy to the Gitea service is not working.
|
|
@ -1,6 +0,0 @@
|
|||||||
---
|
|
||||||
title: "Lists"
|
|
||||||
description: "Mailing lists"
|
|
||||||
---
|
|
||||||
|
|
||||||
If you are reading this page, the proxy to the public-inbox service is not working.
|
|
@ -8,11 +8,3 @@ description: "Low system noise"
|
|||||||
Much like the silent flight of an owl at night, these nodes are configured to
|
Much like the silent flight of an owl at night, these nodes are configured to
|
||||||
minimize the system noise and let programs run undisturbed. The list of nodes is
|
minimize the system noise and let programs run undisturbed. The list of nodes is
|
||||||
`owl[1-2]` and are available for jobs with SLURM.
|
`owl[1-2]` and are available for jobs with SLURM.
|
||||||
|
|
||||||
The contents of the nix store of the hut node is made available in the owl nodes
|
|
||||||
when a job is running. This allows jobs to access the same paths that are on hut
|
|
||||||
to load dependencies.
|
|
||||||
|
|
||||||
For now, only the hut node can be used to build new derivations so that they
|
|
||||||
appear in the compute nodes. This applies to the `nix build`, `nix develop` and
|
|
||||||
`nix shell` commands.
|
|
||||||
|
@ -1,71 +0,0 @@
|
|||||||
---
|
|
||||||
title: "Update 2023-09-12"
|
|
||||||
author: "Rodrigo Arias Mallo"
|
|
||||||
date: 2023-09-12
|
|
||||||
---
|
|
||||||
|
|
||||||
This is a summary of notable changes introduced in the jungle cluster in the
|
|
||||||
last months.
|
|
||||||
|
|
||||||
### New Ceph filesystem available
|
|
||||||
|
|
||||||
We have installed the latest [Ceph filesystem][1] (18.2.0) which stores three
|
|
||||||
redundant copies of the data so a failure in one disk doesn't cause data loss.
|
|
||||||
It is mounted in /ceph and available for use in the owl1, owl2 and hut
|
|
||||||
nodes. For now it provides 2.8 TiB of space and it is expected to
|
|
||||||
increase when the last storage node is installed.
|
|
||||||
|
|
||||||
[1]: https://en.wikipedia.org/wiki/Ceph_(software)
|
|
||||||
|
|
||||||
The throughput is limited by the 1 Gigabit Ethernet speed, but should be
|
|
||||||
reasonably fast for most workloads. Here is a test with dd which reaches the
|
|
||||||
network limit:
|
|
||||||
|
|
||||||
```txt
|
|
||||||
hut% dd if=/dev/urandom of=/ceph/rarias/urandom bs=1M count=1024
|
|
||||||
1024+0 records in
|
|
||||||
1024+0 records out
|
|
||||||
1073741824 bytes (1,1 GB, 1,0 GiB) copied, 8,98544 s, 119 MB/s
|
|
||||||
```
|
|
||||||
|
|
||||||
### SLURM power save
|
|
||||||
|
|
||||||
The SLURM daemon has been configured to power down the nodes after one hour of
|
|
||||||
idling. When a new job is allocated to a node that is powered off, it is
|
|
||||||
automatically turned on and as soon as it becomes available it will execute the
|
|
||||||
job. Here is an example with two nodes that boot and execute a simple job that
|
|
||||||
shows the date.
|
|
||||||
|
|
||||||
```txt
|
|
||||||
hut% date; srun -N 2 date
|
|
||||||
2023-09-12T17:36:09 CEST
|
|
||||||
2023-09-12T17:38:26 CEST
|
|
||||||
2023-09-12T17:38:18 CEST
|
|
||||||
```
|
|
||||||
|
|
||||||
You can expect a similar delay (around 2-3 min) while the nodes are starting.
|
|
||||||
Notice that while the nodes are kept on, the delay is not noticeable:
|
|
||||||
|
|
||||||
```txt
|
|
||||||
hut% date; srun -N 2 date
|
|
||||||
2023-09-12T17:40:04 CEST
|
|
||||||
2023-09-12T17:40:04 CEST
|
|
||||||
2023-09-12T17:40:04 CEST
|
|
||||||
```
|
|
||||||
|
|
||||||
### Power and temperature monitoring
|
|
||||||
|
|
||||||
In the cluster, we monitor the temperature and the power draw of all nodes. This
|
|
||||||
allows us to understand which machines are not being used and turn them off to
|
|
||||||
save energy that otherwise would be wasted. Here is an example where some nodes
|
|
||||||
are powered off to save energy:
|
|
||||||
|
|
||||||
![power](./power.png)
|
|
||||||
|
|
||||||
We also configured the nodes to work at low CPU frequencies, so the temperature
|
|
||||||
is kept low to increase the lifespan of the node components. Towards these
|
|
||||||
goals, we have configured two alerts that trigger when the CPUs of a node
|
|
||||||
exceeds the limit temperature of 80 °C or when the power draw exceeds 350 W.
|
|
||||||
|
|
||||||
By keeping the power consumption and temperatures controlled, we can safely
|
|
||||||
incorporate more machines that will only be used on demand.
|
|
Binary file not shown.
Before Width: | Height: | Size: 58 KiB |
@ -2,27 +2,4 @@ baseURL = 'https://jungle.bsc.es/'
|
|||||||
languageCode = 'en-us'
|
languageCode = 'en-us'
|
||||||
title = 'The jungle'
|
title = 'The jungle'
|
||||||
theme = 'PaperMod'
|
theme = 'PaperMod'
|
||||||
|
sectionPagesMenu = "main"
|
||||||
[[menu.main]]
|
|
||||||
identifier = "grafana"
|
|
||||||
name = "Grafana"
|
|
||||||
url = "/grafana/"
|
|
||||||
weight = 10
|
|
||||||
|
|
||||||
[[menu.main]]
|
|
||||||
identifier = "Git"
|
|
||||||
name = "Git"
|
|
||||||
url = "/git/"
|
|
||||||
weight = 20
|
|
||||||
|
|
||||||
[[menu.main]]
|
|
||||||
identifier = "Lists"
|
|
||||||
name = "Lists"
|
|
||||||
url = "/lists/"
|
|
||||||
weight = 30
|
|
||||||
|
|
||||||
[[menu.main]]
|
|
||||||
identifier = "Posts"
|
|
||||||
name = "Posts"
|
|
||||||
url = "/posts/"
|
|
||||||
weight = 40
|
|
||||||
|
@ -100,9 +100,9 @@
|
|||||||
{{- $separator := or $label_text (not site.Params.disableThemeToggle)}}
|
{{- $separator := or $label_text (not site.Params.disableThemeToggle)}}
|
||||||
{{- with site.Home.AllTranslations }}
|
{{- with site.Home.AllTranslations }}
|
||||||
<ul class="lang-switch">
|
<ul class="lang-switch">
|
||||||
|
{{- if $separator }}<li>|</li>{{ end }}
|
||||||
{{- range . -}}
|
{{- range . -}}
|
||||||
{{- if ne $lang .Lang }}
|
{{- if ne $lang .Lang }}
|
||||||
{{- if $separator }}<li>|</li>{{ end }}
|
|
||||||
<li>
|
<li>
|
||||||
<a href="{{- .Permalink -}}" title="{{ .Language.Params.languageAltTitle | default (.Language.LanguageName | emojify) | default (.Lang | title) }}"
|
<a href="{{- .Permalink -}}" title="{{ .Language.Params.languageAltTitle | default (.Language.LanguageName | emojify) | default (.Lang | title) }}"
|
||||||
aria-label="{{ .Language.LanguageName | default (.Lang | title) }}">
|
aria-label="{{ .Language.LanguageName | default (.Lang | title) }}">
|
||||||
|
@ -17,5 +17,5 @@
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- with ($scratch.Get "meta") }}
|
{{- with ($scratch.Get "meta") }}
|
||||||
{{- delimit . " · " | safeHTML -}}
|
{{- delimit . " · " -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
Loading…
Reference in New Issue
Block a user