Compare commits
39 Commits
lake2-ipoi
...
intro-nix
| Author | SHA1 | Date | |
|---|---|---|---|
| 4da36899f5 | |||
| 3906876a69 | |||
| fdc6445d47 | |||
| e88805947e | |||
| aaefddc44a | |||
| d9d249411d | |||
| c07f75c6bb | |||
| 8d449ba20c | |||
| 10ca572aec | |||
| 75b0f48715 | |||
| 19a451db77 | |||
| ec9be9bb62 | |||
| 7ddd1977f3 | |||
| 7050c505b5 | |||
| 033a1fe97b | |||
| 77cb3c494e | |||
| 6db5772ac4 | |||
| 3e347e673c | |||
| dca274d020 | |||
| c33909f32f | |||
| 64e856e8b9 | |||
| 02f40a8217 | |||
| 77d43b6da9 | |||
| ab55aac5ff | |||
| 9b5bfbb7a3 | |||
| a69a71d1b0 | |||
| 98374bd303 | |||
| 3b6be8a2fc | |||
| 2bb366b9ac | |||
| 2d16709648 | |||
| 9344daa31c | |||
| 80c98041b5 | |||
| 3418e57907 | |||
| 6848b58e39 | |||
| 13a70411aa | |||
| f9c77b433a | |||
| 9d487845f6 | |||
| 3c99c2a662 | |||
| 7d09108c9f |
19
flake.lock
generated
19
flake.lock
generated
@@ -23,12 +23,17 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"bscpkgs": {
|
"bscpkgs": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1690560045,
|
"lastModified": 1694077645,
|
||||||
"narHash": "sha256-39ZP+FIzlWoN3c43hReBYpStg4RLYw/z7TdxCQmOvTM=",
|
"narHash": "sha256-72bvRBhq8Q8V6ibsR9lyBE92V2EC6C6Ek3J5cOM79So=",
|
||||||
"ref": "refs/heads/master",
|
"ref": "refs/heads/master",
|
||||||
"rev": "b4a20d7c3af854b39682484adfd1c7979319f439",
|
"rev": "6122fef92701701e1a0622550ac0fc5c2beb5906",
|
||||||
"revCount": 841,
|
"revCount": 860,
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git"
|
"url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git"
|
||||||
},
|
},
|
||||||
@@ -82,11 +87,11 @@
|
|||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1692447944,
|
"lastModified": 1693663421,
|
||||||
"narHash": "sha256-fkJGNjEmTPvqBs215EQU4r9ivecV5Qge5cF/QDLVn3U=",
|
"narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "d680ded26da5cf104dd2735a51e88d2d8f487b4d",
|
"rev": "e56990880811a451abd32515698c712788be5720",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
agenix.url = "github:ryantm/agenix";
|
agenix.url = "github:ryantm/agenix";
|
||||||
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
||||||
bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git";
|
bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git";
|
||||||
|
bscpkgs.inputs.nixpkgs.follows = "nixpkgs";
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
|
outputs = { self, nixpkgs, agenix, bscpkgs, ... }:
|
||||||
|
|||||||
29
keys.nix
Normal file
29
keys.nix
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# As agenix needs to parse the secrets from a standalone .nix file, we describe
|
||||||
|
# here all the public keys
|
||||||
|
rec {
|
||||||
|
hosts = {
|
||||||
|
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
||||||
|
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
||||||
|
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
||||||
|
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
||||||
|
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
||||||
|
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
||||||
|
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
||||||
|
};
|
||||||
|
|
||||||
|
hostGroup = with hosts; rec {
|
||||||
|
compute = [ owl1 owl2 ];
|
||||||
|
playground = [ eudy koro ];
|
||||||
|
storage = [ bay lake2 ];
|
||||||
|
monitor = [ hut ];
|
||||||
|
|
||||||
|
system = storage ++ monitor;
|
||||||
|
safe = system ++ compute;
|
||||||
|
all = safe ++ playground;
|
||||||
|
};
|
||||||
|
|
||||||
|
admins = {
|
||||||
|
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
||||||
|
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
|
||||||
|
};
|
||||||
|
}
|
||||||
9
m/common/agenix.nix
Normal file
9
m/common/agenix.nix
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{ agenix, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [ agenix.nixosModules.default ];
|
||||||
|
|
||||||
|
environment.systemPackages = [
|
||||||
|
agenix.packages.x86_64-linux.default
|
||||||
|
];
|
||||||
|
}
|
||||||
@@ -6,6 +6,9 @@
|
|||||||
fsType = "ext4";
|
fsType = "ext4";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Trim unused blocks weekly
|
||||||
|
services.fstrim.enable = true;
|
||||||
|
|
||||||
swapDevices =
|
swapDevices =
|
||||||
[ { device = "/dev/disk/by-label/swap"; }
|
[ { device = "/dev/disk/by-label/swap"; }
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
|
./agenix.nix
|
||||||
./boot.nix
|
./boot.nix
|
||||||
./fs.nix
|
./fs.nix
|
||||||
./hw.nix
|
./hw.nix
|
||||||
@@ -10,6 +11,9 @@
|
|||||||
./slurm.nix
|
./slurm.nix
|
||||||
./ssh.nix
|
./ssh.nix
|
||||||
./users.nix
|
./users.nix
|
||||||
|
./watchdog.nix
|
||||||
|
./rev.nix
|
||||||
|
./zsh.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
nixpkgs.overlays = [
|
nixpkgs.overlays = [
|
||||||
@@ -17,6 +21,11 @@
|
|||||||
(import ../../pkgs/overlay.nix)
|
(import ../../pkgs/overlay.nix)
|
||||||
];
|
];
|
||||||
|
|
||||||
|
system.configurationRevision =
|
||||||
|
if theFlake ? rev
|
||||||
|
then theFlake.rev
|
||||||
|
else throw ("Refusing to build from a dirty Git tree!");
|
||||||
|
|
||||||
nix.nixPath = [
|
nix.nixPath = [
|
||||||
"nixpkgs=${nixpkgs}"
|
"nixpkgs=${nixpkgs}"
|
||||||
"bscpkgs=${bscpkgs}"
|
"bscpkgs=${bscpkgs}"
|
||||||
@@ -27,11 +36,6 @@
|
|||||||
nix.registry.bscpkgs.flake = bscpkgs;
|
nix.registry.bscpkgs.flake = bscpkgs;
|
||||||
nix.registry.jungle.flake = theFlake;
|
nix.registry.jungle.flake = theFlake;
|
||||||
|
|
||||||
system.configurationRevision =
|
|
||||||
if theFlake ? rev
|
|
||||||
then theFlake.rev
|
|
||||||
else throw ("Refusing to build from a dirty Git tree!");
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
||||||
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
|
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
|
||||||
@@ -71,9 +75,6 @@
|
|||||||
nix.gc.dates = "weekly";
|
nix.gc.dates = "weekly";
|
||||||
nix.gc.options = "--delete-older-than 30d";
|
nix.gc.options = "--delete-older-than 30d";
|
||||||
|
|
||||||
programs.zsh.enable = true;
|
|
||||||
programs.zsh.histSize = 100000;
|
|
||||||
|
|
||||||
programs.bash.promptInit = ''
|
programs.bash.promptInit = ''
|
||||||
PS1="\h\\$ "
|
PS1="\h\\$ "
|
||||||
'';
|
'';
|
||||||
|
|||||||
@@ -21,9 +21,14 @@
|
|||||||
firewall = {
|
firewall = {
|
||||||
enable = true;
|
enable = true;
|
||||||
allowedTCPPorts = [ 22 ];
|
allowedTCPPorts = [ 22 ];
|
||||||
|
extraCommands = ''
|
||||||
# FIXME: For slurmd as it requests the compute nodes to connect to us
|
# Prevent ssfhead from contacting our slurmd daemon
|
||||||
allowedTCPPortRanges = [ { from=1024; to=65535; } ];
|
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-log-refuse
|
||||||
|
# But accept traffic to slurm ports from any other node in the subnet
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
|
||||||
|
# We also need to open the srun port range
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
extraHosts = ''
|
extraHosts = ''
|
||||||
@@ -32,8 +37,8 @@
|
|||||||
|
|
||||||
# Node Entry for node: mds01 (ID=72)
|
# Node Entry for node: mds01 (ID=72)
|
||||||
10.0.40.40 bay mds01 mds01-eth0
|
10.0.40.40 bay mds01 mds01-eth0
|
||||||
10.0.42.40 mds01-ib0
|
10.0.42.40 bay-ib mds01-ib0
|
||||||
10.0.40.141 mds01-ipmi0
|
10.0.40.141 bay-ipmi mds01-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: oss01 (ID=73)
|
# Node Entry for node: oss01 (ID=73)
|
||||||
10.0.40.41 oss01 oss01-eth0
|
10.0.40.41 oss01 oss01-eth0
|
||||||
@@ -42,18 +47,18 @@
|
|||||||
|
|
||||||
# Node Entry for node: oss02 (ID=74)
|
# Node Entry for node: oss02 (ID=74)
|
||||||
10.0.40.42 lake2 oss02 oss02-eth0
|
10.0.40.42 lake2 oss02 oss02-eth0
|
||||||
10.0.42.42 oss02-ib0
|
10.0.42.42 lake2-ib oss02-ib0
|
||||||
10.0.40.143 oss02-ipmi0
|
10.0.40.143 lake2-ipmi oss02-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon01 (ID=15)
|
# Node Entry for node: xeon01 (ID=15)
|
||||||
10.0.40.1 owl1 xeon01 xeon01-eth0
|
10.0.40.1 owl1 xeon01 xeon01-eth0
|
||||||
10.0.42.1 xeon01-ib0
|
10.0.42.1 owl1-ib xeon01-ib0
|
||||||
10.0.40.101 xeon01-ipmi0
|
10.0.40.101 owl1-ipmi xeon01-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon02 (ID=16)
|
# Node Entry for node: xeon02 (ID=16)
|
||||||
10.0.40.2 owl2 xeon02 xeon02-eth0
|
10.0.40.2 owl2 xeon02 xeon02-eth0
|
||||||
10.0.42.2 xeon02-ib0
|
10.0.42.2 owl2-ib xeon02-ib0
|
||||||
10.0.40.102 xeon02-ipmi0
|
10.0.40.102 owl2-ipmi xeon02-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon03 (ID=17)
|
# Node Entry for node: xeon03 (ID=17)
|
||||||
10.0.40.3 xeon03 xeon03-eth0
|
10.0.40.3 xeon03 xeon03-eth0
|
||||||
@@ -67,8 +72,8 @@
|
|||||||
|
|
||||||
# Node Entry for node: xeon05 (ID=19)
|
# Node Entry for node: xeon05 (ID=19)
|
||||||
10.0.40.5 koro xeon05 xeon05-eth0
|
10.0.40.5 koro xeon05 xeon05-eth0
|
||||||
10.0.42.5 xeon05-ib0
|
10.0.42.5 koro-ib xeon05-ib0
|
||||||
10.0.40.105 xeon05-ipmi0
|
10.0.40.105 koro-ipmi xeon05-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon06 (ID=20)
|
# Node Entry for node: xeon06 (ID=20)
|
||||||
10.0.40.6 xeon06 xeon06-eth0
|
10.0.40.6 xeon06 xeon06-eth0
|
||||||
@@ -77,13 +82,13 @@
|
|||||||
|
|
||||||
# Node Entry for node: xeon07 (ID=21)
|
# Node Entry for node: xeon07 (ID=21)
|
||||||
10.0.40.7 hut xeon07 xeon07-eth0
|
10.0.40.7 hut xeon07 xeon07-eth0
|
||||||
10.0.42.7 xeon07-ib0
|
10.0.42.7 hut-ib xeon07-ib0
|
||||||
10.0.40.107 xeon07-ipmi0
|
10.0.40.107 hut-ipmi xeon07-ipmi0
|
||||||
|
|
||||||
# Node Entry for node: xeon08 (ID=22)
|
# Node Entry for node: xeon08 (ID=22)
|
||||||
10.0.40.8 eudy xeon08 xeon08-eth0
|
10.0.40.8 eudy xeon08 xeon08-eth0
|
||||||
10.0.42.8 xeon08-ib0
|
10.0.42.8 eudy-ib xeon08-ib0
|
||||||
10.0.40.108 xeon08-ipmi0
|
10.0.40.108 eudy-ipmi xeon08-ipmi0
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
18
m/common/rev.nix
Normal file
18
m/common/rev.nix
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{ theFlake, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
rev = if theFlake ? rev then theFlake.rev
|
||||||
|
else throw ("Refusing to build from a dirty Git tree!");
|
||||||
|
in {
|
||||||
|
# Save the commit of the config in /etc/configrev
|
||||||
|
environment.etc.configrev.text = rev + "\n";
|
||||||
|
|
||||||
|
# Keep a log with the config over time
|
||||||
|
system.activationScripts.configRevLog.text = ''
|
||||||
|
BOOTED=$(cat /run/booted-system/etc/configrev 2>/dev/null || echo unknown)
|
||||||
|
CURRENT=$(cat /run/current-system/etc/configrev 2>/dev/null || echo unknown)
|
||||||
|
NEXT=${rev}
|
||||||
|
DATENOW=$(date --iso-8601=seconds)
|
||||||
|
echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log
|
||||||
|
'';
|
||||||
|
}
|
||||||
@@ -1,6 +1,33 @@
|
|||||||
{ lib, ... }:
|
{ config, pkgs, lib, ... }:
|
||||||
|
|
||||||
{
|
let
|
||||||
|
suspendProgram = pkgs.writeScript "suspend.sh" ''
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||||
|
set -x
|
||||||
|
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||||
|
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||||
|
hosts=$(scontrol show hostnames $1)
|
||||||
|
for host in $hosts; do
|
||||||
|
echo Shutting down host: $host
|
||||||
|
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
resumeProgram = pkgs.writeScript "resume.sh" ''
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||||
|
set -x
|
||||||
|
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||||
|
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||||
|
hosts=$(scontrol show hostnames $1)
|
||||||
|
for host in $hosts; do
|
||||||
|
echo Starting host: $host
|
||||||
|
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
in {
|
||||||
systemd.services.slurmd.serviceConfig = {
|
systemd.services.slurmd.serviceConfig = {
|
||||||
# Kill all processes in the control group on stop/restart. This will kill
|
# Kill all processes in the control group on stop/restart. This will kill
|
||||||
# all the jobs running, so ensure that we only upgrade when the nodes are
|
# all the jobs running, so ensure that we only upgrade when the nodes are
|
||||||
@@ -9,6 +36,7 @@
|
|||||||
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
||||||
KillMode = lib.mkForce "control-group";
|
KillMode = lib.mkForce "control-group";
|
||||||
};
|
};
|
||||||
|
|
||||||
services.slurm = {
|
services.slurm = {
|
||||||
client.enable = true;
|
client.enable = true;
|
||||||
controlMachine = "hut";
|
controlMachine = "hut";
|
||||||
@@ -18,6 +46,11 @@
|
|||||||
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
||||||
];
|
];
|
||||||
|
|
||||||
|
partitionName = [
|
||||||
|
"owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP"
|
||||||
|
"all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP"
|
||||||
|
];
|
||||||
|
|
||||||
# See slurm.conf(5) for more details about these options.
|
# See slurm.conf(5) for more details about these options.
|
||||||
extraConfig = ''
|
extraConfig = ''
|
||||||
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
||||||
@@ -37,6 +70,30 @@
|
|||||||
# Enable task/affinity to allow the jobs to run in a specified subset of
|
# Enable task/affinity to allow the jobs to run in a specified subset of
|
||||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||||
TaskPlugin=task/affinity,task/cgroup
|
TaskPlugin=task/affinity,task/cgroup
|
||||||
|
|
||||||
|
# Power off unused nodes until they are requested
|
||||||
|
SuspendProgram=${suspendProgram}
|
||||||
|
SuspendTimeout=60
|
||||||
|
ResumeProgram=${resumeProgram}
|
||||||
|
ResumeTimeout=300
|
||||||
|
SuspendExcNodes=hut
|
||||||
|
|
||||||
|
# Turn the nodes off after 1 hour of inactivity
|
||||||
|
SuspendTime=3600
|
||||||
|
|
||||||
|
# Reduce port range so we can allow only this range in the firewall
|
||||||
|
SrunPortRange=60000-61000
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
age.secrets.mungeKey = {
|
||||||
|
file = ../../secrets/munge-key.age;
|
||||||
|
owner = "munge";
|
||||||
|
group = "munge";
|
||||||
|
};
|
||||||
|
|
||||||
|
services.munge = {
|
||||||
|
enable = true;
|
||||||
|
password = config.age.secrets.mungeKey.path;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
{ ... }:
|
{ lib, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
keys = import ../../keys.nix;
|
||||||
|
hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts;
|
||||||
|
in
|
||||||
{
|
{
|
||||||
# Enable the OpenSSH daemon.
|
# Enable the OpenSSH daemon.
|
||||||
services.openssh.enable = true;
|
services.openssh.enable = true;
|
||||||
@@ -11,13 +15,7 @@
|
|||||||
ProxyCommand nc -X connect -x localhost:23080 %h %p
|
ProxyCommand nc -X connect -x localhost:23080 %h %p
|
||||||
'';
|
'';
|
||||||
|
|
||||||
programs.ssh.knownHosts = {
|
programs.ssh.knownHosts = hostsKeys // {
|
||||||
"hut".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1";
|
|
||||||
"owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv";
|
|
||||||
"owl2".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK";
|
|
||||||
"eudy".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG";
|
|
||||||
"koro".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67";
|
|
||||||
|
|
||||||
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
||||||
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
{ ... }:
|
{ pkgs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
users = {
|
users = {
|
||||||
@@ -26,6 +26,7 @@
|
|||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio"
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal"
|
||||||
];
|
];
|
||||||
|
shell = pkgs.zsh;
|
||||||
};
|
};
|
||||||
|
|
||||||
arocanon = {
|
arocanon = {
|
||||||
@@ -53,6 +54,18 @@
|
|||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
anavarro = {
|
||||||
|
uid = 1037;
|
||||||
|
isNormalUser = true;
|
||||||
|
home = "/home/Computational/anavarro";
|
||||||
|
description = "Antoni Navarro";
|
||||||
|
group = "Computational";
|
||||||
|
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
|
||||||
|
];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
groups = {
|
groups = {
|
||||||
|
|||||||
9
m/common/watchdog.nix
Normal file
9
m/common/watchdog.nix
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
# The boards have a BMC watchdog controlled by IPMI
|
||||||
|
boot.kernelModules = [ "ipmi_watchdog" ];
|
||||||
|
|
||||||
|
# Enable systemd watchdog with 30 s interval
|
||||||
|
systemd.watchdog.runtimeTime = "30s";
|
||||||
|
}
|
||||||
92
m/common/zsh.nix
Normal file
92
m/common/zsh.nix
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
{ pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
direnv
|
||||||
|
zsh-completions
|
||||||
|
nix-zsh-completions
|
||||||
|
];
|
||||||
|
|
||||||
|
programs.zsh = {
|
||||||
|
enable = true;
|
||||||
|
histSize = 1000000;
|
||||||
|
|
||||||
|
shellInit = ''
|
||||||
|
# Disable new user prompt
|
||||||
|
if [ ! -e ~/.zshrc ]; then
|
||||||
|
touch ~/.zshrc
|
||||||
|
fi
|
||||||
|
'';
|
||||||
|
|
||||||
|
promptInit = ''
|
||||||
|
# Note that to manually override this in ~/.zshrc you should run `prompt off`
|
||||||
|
# before setting your PS1 and etc. Otherwise this will likely to interact with
|
||||||
|
# your ~/.zshrc configuration in unexpected ways as the default prompt sets
|
||||||
|
# a lot of different prompt variables.
|
||||||
|
autoload -U promptinit && promptinit && prompt default && setopt prompt_sp
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Taken from Ulli Kehrle config:
|
||||||
|
# https://git.hrnz.li/Ulli/nixos/src/commit/2e203b8d8d671f4e3ced0f1744a51d5c6ee19846/profiles/shell.nix#L199-L205
|
||||||
|
interactiveShellInit = ''
|
||||||
|
source "${pkgs.zsh-history-substring-search}/share/zsh-history-substring-search/zsh-history-substring-search.zsh"
|
||||||
|
|
||||||
|
# Save history immediately, but only load it when the shell starts
|
||||||
|
setopt inc_append_history
|
||||||
|
|
||||||
|
# dircolors doesn't support alacritty:
|
||||||
|
# https://lists.gnu.org/archive/html/bug-coreutils/2019-05/msg00029.html
|
||||||
|
export LS_COLORS='rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=00:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.avif=01;35:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:*~=00;90:*#=00;90:*.bak=00;90:*.old=00;90:*.orig=00;90:*.part=00;90:*.rej=00;90:*.swp=00;90:*.tmp=00;90:*.dpkg-dist=00;90:*.dpkg-old=00;90:*.ucf-dist=00;90:*.ucf-new=00;90:*.ucf-old=00;90:*.rpmnew=00;90:*.rpmorig=00;90:*.rpmsave=00;90:';
|
||||||
|
|
||||||
|
# From Arch Linux and GRML
|
||||||
|
bindkey "^R" history-incremental-pattern-search-backward
|
||||||
|
bindkey "^S" history-incremental-pattern-search-forward
|
||||||
|
|
||||||
|
# Auto rehash for new binaries
|
||||||
|
zstyle ':completion:*' rehash true
|
||||||
|
# show a nice menu with the matches
|
||||||
|
zstyle ':completion:*' menu yes select
|
||||||
|
|
||||||
|
bindkey '^[OA' history-substring-search-up # Up
|
||||||
|
bindkey '^[[A' history-substring-search-up # Up
|
||||||
|
|
||||||
|
bindkey '^[OB' history-substring-search-down # Down
|
||||||
|
bindkey '^[[B' history-substring-search-down # Down
|
||||||
|
|
||||||
|
bindkey '\e[1~' beginning-of-line # Home
|
||||||
|
bindkey '\e[7~' beginning-of-line # Home
|
||||||
|
bindkey '\e[H' beginning-of-line # Home
|
||||||
|
bindkey '\eOH' beginning-of-line # Home
|
||||||
|
|
||||||
|
bindkey '\e[4~' end-of-line # End
|
||||||
|
bindkey '\e[8~' end-of-line # End
|
||||||
|
bindkey '\e[F ' end-of-line # End
|
||||||
|
bindkey '\eOF' end-of-line # End
|
||||||
|
|
||||||
|
bindkey '^?' backward-delete-char # Backspace
|
||||||
|
bindkey '\e[3~' delete-char # Del
|
||||||
|
# bindkey '\e[3;5~' delete-char # sometimes Del, sometimes C-Del
|
||||||
|
bindkey '\e[2~' overwrite-mode # Ins
|
||||||
|
|
||||||
|
bindkey '^H' backward-kill-word # C-Backspace
|
||||||
|
|
||||||
|
bindkey '5~' kill-word # C-Del
|
||||||
|
bindkey '^[[3;5~' kill-word # C-Del
|
||||||
|
bindkey '^[[3^' kill-word # C-Del
|
||||||
|
|
||||||
|
bindkey "^[[1;5H" backward-kill-line # C-Home
|
||||||
|
bindkey "^[[7^" backward-kill-line # C-Home
|
||||||
|
|
||||||
|
bindkey "^[[1;5F" kill-line # C-End
|
||||||
|
bindkey "^[[8^" kill-line # C-End
|
||||||
|
|
||||||
|
bindkey '^[[1;5C' forward-word # C-Right
|
||||||
|
bindkey '^[0c' forward-word # C-Right
|
||||||
|
bindkey '^[[5C' forward-word # C-Right
|
||||||
|
|
||||||
|
bindkey '^[[1;5D' backward-word # C-Left
|
||||||
|
bindkey '^[0d' backward-word # C-Left
|
||||||
|
bindkey '^[[5D' backward-word # C-Left
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,21 +1,19 @@
|
|||||||
{ config, pkgs, agenix, ... }:
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/main.nix
|
../common/main.nix
|
||||||
|
|
||||||
|
../module/ceph.nix
|
||||||
./gitlab-runner.nix
|
./gitlab-runner.nix
|
||||||
./monitoring.nix
|
./monitoring.nix
|
||||||
./nfs.nix
|
./nfs.nix
|
||||||
./slurm-daemon.nix
|
./slurm-daemon.nix
|
||||||
./ceph.nix
|
./nix-serve.nix
|
||||||
#./pxe.nix
|
#./pxe.nix
|
||||||
agenix.nixosModules.default
|
|
||||||
];
|
];
|
||||||
|
|
||||||
environment.systemPackages = [
|
boot.binfmt.emulatedSystems = [ "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ];
|
||||||
agenix.packages.x86_64-linux.default
|
|
||||||
];
|
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";
|
boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";
|
||||||
|
|||||||
@@ -1,33 +1,40 @@
|
|||||||
{ pkgs, lib, config, ... }:
|
{ pkgs, lib, config, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
age.secrets."secrets/ovni-token".file = ./secrets/ovni-token.age;
|
age.secrets.ovniToken.file = ../../secrets/ovni-token.age;
|
||||||
age.secrets."secrets/nosv-token".file = ./secrets/nosv-token.age;
|
age.secrets.nosvToken.file = ../../secrets/nosv-token.age;
|
||||||
|
|
||||||
services.gitlab-runner = {
|
services.gitlab-runner = {
|
||||||
enable = true;
|
enable = true;
|
||||||
settings.concurrent = 5;
|
settings.concurrent = 5;
|
||||||
services = {
|
services = {
|
||||||
ovni-shell = {
|
ovni-shell = {
|
||||||
registrationConfigFile = config.age.secrets."secrets/ovni-token".path;
|
registrationConfigFile = config.age.secrets.ovniToken.path;
|
||||||
executor = "shell";
|
executor = "shell";
|
||||||
tagList = [ "nix" "xeon" ];
|
tagList = [ "nix" "xeon" ];
|
||||||
|
registrationFlags = [
|
||||||
|
# Using space doesn't work, and causes it to misread the next flag
|
||||||
|
"--locked='false'"
|
||||||
|
];
|
||||||
environmentVariables = {
|
environmentVariables = {
|
||||||
SHELL = "${pkgs.bash}/bin/bash";
|
SHELL = "${pkgs.bash}/bin/bash";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
ovni-docker = {
|
ovni-docker = {
|
||||||
registrationConfigFile = config.age.secrets."secrets/ovni-token".path;
|
registrationConfigFile = config.age.secrets.ovniToken.path;
|
||||||
dockerImage = "debian:stable";
|
dockerImage = "debian:stable";
|
||||||
tagList = [ "docker" "xeon" ];
|
tagList = [ "docker" "xeon" ];
|
||||||
registrationFlags = [ "--docker-network-mode host" ];
|
registrationFlags = [
|
||||||
|
"--locked='false'"
|
||||||
|
"--docker-network-mode host"
|
||||||
|
];
|
||||||
environmentVariables = {
|
environmentVariables = {
|
||||||
https_proxy = "http://localhost:23080";
|
https_proxy = "http://localhost:23080";
|
||||||
http_proxy = "http://localhost:23080";
|
http_proxy = "http://localhost:23080";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
nosv-docker = {
|
nosv-docker = {
|
||||||
registrationConfigFile = config.age.secrets."secrets/nosv-token".path;
|
registrationConfigFile = config.age.secrets.nosvToken.path;
|
||||||
dockerImage = "debian:stable";
|
dockerImage = "debian:stable";
|
||||||
tagList = [ "docker" "xeon" ];
|
tagList = [ "docker" "xeon" ];
|
||||||
registrationFlags = [
|
registrationFlags = [
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
enable = true;
|
enable = true;
|
||||||
port = 9001;
|
port = 9001;
|
||||||
retentionTime = "1y";
|
retentionTime = "1y";
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
};
|
};
|
||||||
|
|
||||||
systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false;
|
systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false;
|
||||||
@@ -48,13 +49,18 @@
|
|||||||
user = "root";
|
user = "root";
|
||||||
configFile = ./ipmi.yml;
|
configFile = ./ipmi.yml;
|
||||||
#extraFlags = [ "--log.level=debug" ];
|
#extraFlags = [ "--log.level=debug" ];
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
};
|
};
|
||||||
node = {
|
node = {
|
||||||
enable = true;
|
enable = true;
|
||||||
enabledCollectors = [ "systemd" ];
|
enabledCollectors = [ "systemd" ];
|
||||||
port = 9002;
|
port = 9002;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
};
|
||||||
|
smartctl = {
|
||||||
|
enable = true;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
};
|
};
|
||||||
smartctl.enable = true;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
scrapeConfigs = [
|
scrapeConfigs = [
|
||||||
|
|||||||
16
m/hut/nix-serve.nix
Normal file
16
m/hut/nix-serve.nix
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{ config, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
|
||||||
|
|
||||||
|
services.nix-serve = {
|
||||||
|
enable = true;
|
||||||
|
# Only listen locally, as we serve it via ssh
|
||||||
|
bindAddress = "127.0.0.1";
|
||||||
|
port = 5000;
|
||||||
|
|
||||||
|
secretKeyFile = config.age.secrets.nixServe.path;
|
||||||
|
# Public key:
|
||||||
|
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
let
|
|
||||||
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
|
||||||
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb";
|
|
||||||
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1";
|
|
||||||
default = [ rarias root hut ];
|
|
||||||
in
|
|
||||||
{
|
|
||||||
"secrets/ovni-token.age".publicKeys = default;
|
|
||||||
"secrets/nosv-token.age".publicKeys = default;
|
|
||||||
"secrets/ceph-user.age".publicKeys = default;
|
|
||||||
}
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
age-encryption.org/v1
|
|
||||||
-> ssh-ed25519 CAWG4Q 35Ak+Mep9k5KnDLF1ywDbMD4l4mRFg6D0et19tqXxAw
|
|
||||||
Wgr+CX4rzrPmUszSidtLAVSvgD80F2dqtd92hGZIFwo
|
|
||||||
-> ssh-ed25519 MSF3dg OVFvpkAyWTowtxsafstX31H/hJpNZmnOCbvqMIN0+AQ
|
|
||||||
VxjRcQmp+BadEh2y0PB96EeizIl3tTQpVu0CWHmsc1s
|
|
||||||
-> ssh-ed25519 HY2yRg MJSQIpre9m0XnojgXuKQ/+hVBZNrZNGZqplwhqicpjI
|
|
||||||
CLkE52iqpoqSnbzisNjQgxTfNqKeaRl5ntcw1d+ZDyQ
|
|
||||||
-> m$8`De%~-grease '85p}`by
|
|
||||||
52zMpprONcawWDDtzHdWNwFoYXErPUnVjhSONbUBpDlqAmJmD1LcAnsU
|
|
||||||
--- 0vZOPyXQIMMGTwgFfvm8Sn8O7vjrsjGUEy5m/BASCyc
|
|
||||||
<EFBFBD>|<04><><EFBFBD>)<29><><EFBFBD><EFBFBD><EFBFBD>*_<>D<EFBFBD>US`<06><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>r <20>s<EFBFBD><73>N<EFBFBD><4E>[^e+A1<><31>G.<2E>#<23><><EFBFBD>m<EFBFBD><6D>W<57> <20>5<0C><><EFBFBD><EFBFBD>(
|
|
||||||
Binary file not shown.
Binary file not shown.
@@ -3,9 +3,5 @@
|
|||||||
{
|
{
|
||||||
services.slurm = {
|
services.slurm = {
|
||||||
server.enable = true;
|
server.enable = true;
|
||||||
partitionName = [
|
|
||||||
"owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP"
|
|
||||||
"all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP"
|
|
||||||
];
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,5 +7,9 @@
|
|||||||
- 10.0.40.106
|
- 10.0.40.106
|
||||||
- 10.0.40.107
|
- 10.0.40.107
|
||||||
- 10.0.40.108
|
- 10.0.40.108
|
||||||
|
# Storage
|
||||||
|
- 10.0.40.141
|
||||||
|
- 10.0.40.142
|
||||||
|
- 10.0.40.143
|
||||||
labels:
|
labels:
|
||||||
job: ipmi-lan
|
job: ipmi-lan
|
||||||
|
|||||||
@@ -50,4 +50,24 @@
|
|||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
} ];
|
} ];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Missing service for volumes, see:
|
||||||
|
# https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/
|
||||||
|
systemd.services.ceph-volume = {
|
||||||
|
enable = true;
|
||||||
|
description = "Ceph Volume activation";
|
||||||
|
unitConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
After = "local-fs.target";
|
||||||
|
Wants = "local-fs.target";
|
||||||
|
};
|
||||||
|
path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ];
|
||||||
|
serviceConfig = {
|
||||||
|
KillMode = "none";
|
||||||
|
Environment = "CEPH_VOLUME_TIMEOUT=10000";
|
||||||
|
ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'";
|
||||||
|
TimeoutSec = "0";
|
||||||
|
};
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
{ config, pkgs, ... }:
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
|
# Mounts the /ceph filesystem at boot
|
||||||
{
|
{
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
ceph
|
ceph
|
||||||
@@ -11,14 +12,14 @@
|
|||||||
# modprobe command.
|
# modprobe command.
|
||||||
boot.kernelModules = [ "ceph" ];
|
boot.kernelModules = [ "ceph" ];
|
||||||
|
|
||||||
age.secrets."secrets/ceph-user".file = ./secrets/ceph-user.age;
|
age.secrets.cephUser.file = ../../secrets/ceph-user.age;
|
||||||
|
|
||||||
fileSystems."/ceph" = {
|
fileSystems."/ceph" = {
|
||||||
fsType = "ceph";
|
fsType = "ceph";
|
||||||
device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/";
|
device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/";
|
||||||
options = [
|
options = [
|
||||||
"mon_addr=10.0.40.40"
|
"mon_addr=10.0.40.40"
|
||||||
"secretfile=${config.age.secrets."secrets/ceph-user".path}"
|
"secretfile=${config.age.secrets.cephUser.path}"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
8
m/module/slurm-firewall.nix
Normal file
8
m/module/slurm-firewall.nix
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.firewall = {
|
||||||
|
# Required for PMIx in SLURM, we should find a better way
|
||||||
|
allowedTCPPortRanges = [ { from=1024; to=65535; } ];
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,7 +1,11 @@
|
|||||||
{ config, pkgs, ... }:
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [ ../common/main.nix ];
|
imports = [
|
||||||
|
../common/main.nix
|
||||||
|
../module/ceph.nix
|
||||||
|
../module/slurm-firewall.nix
|
||||||
|
];
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c";
|
||||||
|
|||||||
@@ -1,15 +1,14 @@
|
|||||||
{ config, pkgs, modulesPath, lib, ... }:
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
|
||||||
../common/main.nix
|
../common/main.nix
|
||||||
|
../module/ceph.nix
|
||||||
|
../module/slurm-firewall.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629";
|
||||||
#programs.ssh.forwardX11 = false;
|
|
||||||
#programs.ssh.setXAuthLocation = lib.mkForce true;
|
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
hostName = "owl2";
|
hostName = "owl2";
|
||||||
@@ -17,6 +16,7 @@
|
|||||||
address = "10.0.40.2";
|
address = "10.0.40.2";
|
||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
} ];
|
} ];
|
||||||
|
# Watch out! The OmniPath device is not in the same place here:
|
||||||
interfaces.ibp129s0.ipv4.addresses = [ {
|
interfaces.ibp129s0.ipv4.addresses = [ {
|
||||||
address = "10.0.42.2";
|
address = "10.0.42.2";
|
||||||
prefixLength = 24;
|
prefixLength = 24;
|
||||||
|
|||||||
21
secrets/ceph-user.age
Normal file
21
secrets/ceph-user.age
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
age-encryption.org/v1
|
||||||
|
-> ssh-ed25519 AY8zKw J00a6ZOhkupkhLU5WQ0kD05HEF4KKsSs2hwjHKbnnHU
|
||||||
|
J14VoNOCqLpScVO7OLXbqTcLI4tcVUHt5cqY/XQmbGs
|
||||||
|
-> ssh-ed25519 sgAamA k8R/bSUdvVmlBI6yHPi5NBQPBGM36lPJwsir8DFGgxE
|
||||||
|
4ZKC3gYvic6AVrNGgNjwztbUzhxP8ViX5O3wFo9wlrk
|
||||||
|
-> ssh-ed25519 HY2yRg 966xf2fTnA6Wq0uYXbXZQOManqITJcCbQS9LZCGEOh4
|
||||||
|
Qg5echQSrzqeDqvaMx+5fqi8XyTjAeCsY/UFJX6YnDs
|
||||||
|
-> ssh-ed25519 tcumPQ e0U2okrGIoUpLfPYjIRx1V92rE3hZW13nJef+l3kBQg
|
||||||
|
LejAUKBl+tPhwocCF00ZHTzFISnwX8og8GvemiMIcyo
|
||||||
|
-> ssh-ed25519 JJ1LWg QkzTsPq9Gdh+FNz/a4bDb9LQOreFyxeTC51UNd1fsj0
|
||||||
|
ayrlKenETfQzH1Z9drVEWqszQebicGVJve0/pCnxAE8
|
||||||
|
-> ssh-ed25519 CAWG4Q lJLW9+dxvyoD4hYzeXeE/4rzJ6HIeEQOB1+fbhV3xw0
|
||||||
|
T2RrVCtTuQvya9HiJB7txk3QGrntpsMX9Tt1cyXoW5E
|
||||||
|
-> ssh-ed25519 MSF3dg JOZkFb2CfqWKvZIz7lYxXWgv8iEVDkQF8hInDMZvknc
|
||||||
|
MHDWxjUw4dNiC1h4MrU9uKKcI3rwkxABm0+5FYMZkok
|
||||||
|
-> ~8m;7f-grease
|
||||||
|
lDIullfC98RhpTZ4Mk87Td+VtPmwPdgz+iIilpKugUkmV5r4Uqd7yE+5ArA6ekr/
|
||||||
|
G/X4EA
|
||||||
|
--- Cz4sv9ZunBcVdZCozdTh1zlg1zIASjk2MjYeYfcN9eA
|
||||||
|
<EFBFBD>N <09>$[H<><48>Q<EFBFBD><51><EFBFBD>
|
||||||
|
d<EFBFBD><EFBFBD><EFBFBD>'<27><><EFBFBD>7<EFBFBD><1F>Ͳ)<29><><EFBFBD><17>x9y<39><79><EFBFBD>E<04><><EFBFBD>M7^<5E>[<5B>M<EFBFBD>+<2B>&<26><><EFBFBD><0E>$8tM<74>в
|
||||||
BIN
secrets/munge-key.age
Normal file
BIN
secrets/munge-key.age
Normal file
Binary file not shown.
12
secrets/nix-serve.age
Normal file
12
secrets/nix-serve.age
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
age-encryption.org/v1
|
||||||
|
-> ssh-ed25519 HY2yRg d144D+VvxhYgKtH//uD2qNuVnYX6bh74YqkyM3ZjBwU
|
||||||
|
0IeVmFAf4U8Sm0d01O6ZwJ1V2jl/mSMl4wF0MP5LrIg
|
||||||
|
-> ssh-ed25519 CAWG4Q H4nKxue/Cj/3KUF5A+/ygHMjjArwgx3SIWwXcqFtyUo
|
||||||
|
4k5NJkLUrueLYiPkr2LAwQLWmuaOIsDmV/86ravpleU
|
||||||
|
-> ssh-ed25519 MSF3dg HpgUAFHLPs4w0cdJHqTwf8lySkTeV9O9NnBf49ClDHs
|
||||||
|
foPIUUgAYe1YSDy6+aMfjN7xv9xud9fDmhRlIztHoEo
|
||||||
|
-> vLkF\<-grease
|
||||||
|
3GRT+W8gYSpjl/a6Ix9+g9UJnTpl1ZH/oucfR801vfE8y77DV2Jxz/XJwzxYxKG5
|
||||||
|
YEhiTGMNbXw/V7E5aVSz6Bdc
|
||||||
|
--- GtiHKCZdHByq9j0BSLd544PhbEwTN138E8TFdxipeiA
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>G$S<><53><EFBFBD>RA<52><41><EFBFBD>Th]n<>8<EFBFBD>,<2C>H<EFBFBD>s<EFBFBD><73><EFBFBD>=p<><70><EFBFBD>'<27><><EFBFBD>+<2B>j<><6A><EFBFBD><EFBFBD>9<EFBFBD>)<29>:<3A>)<15><><EFBFBD>Y<EFBFBD><59><EFBFBD>8<EFBFBD>I<EFBFBD><49>8:ol<6F><6C><EFBFBD><1F><><EFBFBD>Z<EFBFBD><5A>3<>PM<50>F;<3B>rY<72><59><EFBFBD><EFBFBD><1F>$<24><>y<EFBFBD>L<>ٜ<EFBFBD>Μ<1B><>U<EFBFBD>s16Ǿ<>L<EFBFBD>b<EFBFBD><62><EFBFBD>
|
||||||
11
secrets/nosv-token.age
Normal file
11
secrets/nosv-token.age
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
age-encryption.org/v1
|
||||||
|
-> ssh-ed25519 HY2yRg hrdS7Dl/j+u3XVfM79ZJpZSlre9TcD7DTQ+EEAT6kEE
|
||||||
|
avUO96P1h7w2BYWgrQ7GpUgdaCV9AZL7eOTTcF9gfro
|
||||||
|
-> ssh-ed25519 CAWG4Q A5raRY1CAgFYZgoQ92GMyNejYNdHx/7Y6uTS+EjLPWA
|
||||||
|
FRFqT2Jz7qRcybaxkQTKHGl797LVXoHpYG4RZSrX/70
|
||||||
|
-> ssh-ed25519 MSF3dg D+R80Bg7W9AuiOMAqtGFZQl994dRBIegYRLmmTaeZ3o
|
||||||
|
BHvZsugRiuZ91b4jk91h30o3eF3hadSnVCwxXge95T8
|
||||||
|
-> BT/El`a-grease W{nq|Vm )bld 2Nl}4 N$#JGB4t
|
||||||
|
oLG+0S1aGfO/ohCfgGmhDhwwLi4H
|
||||||
|
--- 2I5C+FvBG/K1ZHh7C5QD39feTSLoFGwcTeZAmeILNsI
|
||||||
|
<EFBFBD><EFBFBD>W<EFBFBD>o<> <14><>d;<3B><>C<EFBFBD>.<2E><>_(<28>u
|
||||||
BIN
secrets/ovni-token.age
Normal file
BIN
secrets/ovni-token.age
Normal file
Binary file not shown.
15
secrets/secrets.nix
Normal file
15
secrets/secrets.nix
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
let
|
||||||
|
keys = import ../keys.nix;
|
||||||
|
adminsKeys = builtins.attrValues keys.admins;
|
||||||
|
hut = [ keys.hosts.hut ] ++ adminsKeys;
|
||||||
|
# Only expose ceph keys to safe nodes and admins
|
||||||
|
safe = keys.hostGroup.safe ++ adminsKeys;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
"ovni-token.age".publicKeys = hut;
|
||||||
|
"nosv-token.age".publicKeys = hut;
|
||||||
|
"nix-serve.age".publicKeys = hut;
|
||||||
|
|
||||||
|
"ceph-user.age".publicKeys = safe;
|
||||||
|
"munge-key.age".publicKeys = safe;
|
||||||
|
}
|
||||||
@@ -17,6 +17,6 @@ Then, to request access to the machines we will need some information about you:
|
|||||||
1. The salted hash of your login password, generated with `mkpasswd -m sha-512`
|
1. The salted hash of your login password, generated with `mkpasswd -m sha-512`
|
||||||
1. An SSH public key of type Ed25519 (can be generated with `ssh-keygen -t ed25519`)
|
1. An SSH public key of type Ed25519 (can be generated with `ssh-keygen -t ed25519`)
|
||||||
|
|
||||||
You can send us both an email at <rodrigo.arias@bsc.es> and
|
Send an email to <jungle@bsc.es> with the details, or directly open a
|
||||||
<aleix.rocanonell@bsc.es> with the details, or directly open a merge request in
|
merge request in the [jungle
|
||||||
the [jungle repository](https://pm.bsc.es/gitlab/rarias/jungle/).
|
repository](https://pm.bsc.es/gitlab/rarias/jungle/).
|
||||||
|
|||||||
7
web/content/intro-nix/_index.md
Normal file
7
web/content/intro-nix/_index.md
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
---
|
||||||
|
title: "Intro to nix"
|
||||||
|
date: 2023-09-15
|
||||||
|
---
|
||||||
|
|
||||||
|
Basic introduction to Nix for users of the jungle machines. You should be able
|
||||||
|
to access the jungle machines, otherwise [request access](/access).
|
||||||
100
web/content/intro-nix/ch1.md
Normal file
100
web/content/intro-nix/ch1.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
---
|
||||||
|
title: "Chapter 1: Packages"
|
||||||
|
description: "Here we show where packages come from"
|
||||||
|
date: 2023-06-13T19:36:57+02:00
|
||||||
|
weight: 1
|
||||||
|
---
|
||||||
|
|
||||||
|
In this chapter we describe where the packages available in the cluster come
|
||||||
|
from and how to load them.
|
||||||
|
|
||||||
|
## Where packages come from
|
||||||
|
|
||||||
|
The packages in the jungle cluster are constructed by *layers*. Each layer
|
||||||
|
applies some changes over the previous one:
|
||||||
|
|
||||||
|
1. The first layer is [nixpkgs][1], a large repository of packages maintained by
|
||||||
|
the NixOS community. It provides packages like gcc, bash, gcc or the linux
|
||||||
|
kernel.
|
||||||
|
|
||||||
|
[1]: https://github.com/NixOS/nixpkgs/
|
||||||
|
|
||||||
|
2. The second layer is [bscpkgs][2], it takes the nixpkgs set of packages and
|
||||||
|
expands it by adding custom packages from the BSC such as Nanos6, nOS-V,
|
||||||
|
NODES, ovni or wxparaver.
|
||||||
|
|
||||||
|
[2]: https://pm.bsc.es/gitlab/rarias/bscpkgs
|
||||||
|
|
||||||
|
3. The third layer is [jungle][3], it takes the extended packages from bscpkgs
|
||||||
|
and configures them for the jungle cluster. For example, we configure MPICH
|
||||||
|
to use the OmniPath network and set it as the default implementation.
|
||||||
|
|
||||||
|
[3]: https://pm.bsc.es/gitlab/rarias/jungle
|
||||||
|
|
||||||
|
These layers are called *overlays* in Nix and they are the default mechanism
|
||||||
|
used to modify the packages. Generally you will use the packages defined in the
|
||||||
|
last layer (jungle) but you can define your own additional layer to specify
|
||||||
|
custom changes. For example, instead of choosing MPICH, you may want to use
|
||||||
|
Intel MPI instead by default.
|
||||||
|
|
||||||
|
## Loading packages in an ephemeral shell
|
||||||
|
|
||||||
|
You can manually load packages in a *new* shell with `nix shell jungle#<pkg>`,
|
||||||
|
for example:
|
||||||
|
|
||||||
|
```
|
||||||
|
hut% which ovniemu
|
||||||
|
ovniemu not found
|
||||||
|
hut% nix shell jungle#bsc.ovni
|
||||||
|
hut% which ovniemu
|
||||||
|
/nix/store/0yzas8007x9djlpbb0pckcr1vhd0mcfy-ovni-1.3.0/bin/ovniemu
|
||||||
|
hut% exit
|
||||||
|
hut%
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also specify multiple packages by listing them as parameters of `nix
|
||||||
|
shell`:
|
||||||
|
|
||||||
|
```
|
||||||
|
hut% nix shell jungle#bsc.ovni jungle#bsc.osumb
|
||||||
|
hut% which osu_bw
|
||||||
|
/nix/store/lnjirzllhjn2fadlqzrz7a547iawl8jc-osu-micro-benchmarks-7.1-1/bin/osu_bw
|
||||||
|
hut% exit
|
||||||
|
```
|
||||||
|
|
||||||
|
Or make the bash (zsh in this case) shell expand them:
|
||||||
|
|
||||||
|
```
|
||||||
|
hut% echo nix shell jungle#bsc.{ovni,osumb}
|
||||||
|
nix shell jungle#bsc.ovni jungle#bsc.osumb
|
||||||
|
hut% nix shell jungle#bsc.{ovni,osumb}
|
||||||
|
hut% which osu_bw
|
||||||
|
/nix/store/lnjirzllhjn2fadlqzrz7a547iawl8jc-osu-micro-benchmarks-7.1-1/bin/osu_bw
|
||||||
|
hut% exit
|
||||||
|
```
|
||||||
|
|
||||||
|
You can use TAB to see which packages are available:
|
||||||
|
|
||||||
|
```
|
||||||
|
hut% nix shell jungle#bsc.n<TAB>
|
||||||
|
jungle\#bsc.nanos6 jungle\#bsc.nixtools
|
||||||
|
jungle\#bsc.nanos6Debug jungle\#bsc.nix-wrap
|
||||||
|
jungle\#bsc.nanos6Git jungle\#bsc.nodes
|
||||||
|
jungle\#bsc.nanos6GlibcxxDebug jungle\#bsc.nodesGit
|
||||||
|
jungle\#bsc.nanos6-icc jungle\#bsc.nodesRelease
|
||||||
|
jungle\#bsc.nanos6-icx jungle\#bsc.nodesWithOvni
|
||||||
|
jungle\#bsc.nanos6Release jungle\#bsc.nosv
|
||||||
|
jungle\#bsc.nix-mn4
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice that these packages are evaluated at the moment the command is invoked.
|
||||||
|
So if you come back a month later and run the same command, you may find that
|
||||||
|
the packages have been updated and that could be problematic.
|
||||||
|
|
||||||
|
In the next section we will create a new flake that defines the packages of the
|
||||||
|
shell and also records the exact version of the packages that we used at the
|
||||||
|
evaluation time for future use.
|
||||||
|
|
||||||
|
In the [next chapter](../ch2) we will see how to create a permanent shell that
|
||||||
|
will retain the same packages even if they are upgraded in the cluster, until we
|
||||||
|
decide to upgrade them.
|
||||||
155
web/content/intro-nix/ch2.md
Normal file
155
web/content/intro-nix/ch2.md
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
---
|
||||||
|
title: "Chapter 2: Your first shell"
|
||||||
|
date: 2023-09-15
|
||||||
|
weight: 2
|
||||||
|
---
|
||||||
|
|
||||||
|
## Creating a shell with flake.nix
|
||||||
|
|
||||||
|
First, create an empty git repository where your shells will live:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% mkdir jungle-examples
|
||||||
|
hut% cd jungle-examples
|
||||||
|
hut% git init
|
||||||
|
Initialized empty Git repository in /home/Computational/rarias/jungle-examples/.git/
|
||||||
|
```
|
||||||
|
|
||||||
|
And then, place a file named `flake.nix` on the repo with this content:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
{
|
||||||
|
inputs.jungle.url = "jungle";
|
||||||
|
nixConfig.bash-prompt = "\[nix-develop\]$ ";
|
||||||
|
|
||||||
|
outputs = { self, jungle }:
|
||||||
|
let
|
||||||
|
pkgs = jungle.outputs.packages.x86_64-linux;
|
||||||
|
in {
|
||||||
|
devShells.x86_64-linux.default = pkgs.mkShell rec {
|
||||||
|
pname = "my-shell";
|
||||||
|
buildInputs = with pkgs.bsc; [
|
||||||
|
ovni osumb # other packages here...
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This file defines a how to create a shell in the Nix language with the
|
||||||
|
`pkgs.mkShell` function using the packages listed in `buildInputs`. It also
|
||||||
|
requests the packages to be taken from the *jungle* input, which corresponds to
|
||||||
|
the set of packages that [we defined earlier](../ch1#where-packages-come-from),
|
||||||
|
tuned for the cluster. We will describe it in more detail later.
|
||||||
|
|
||||||
|
The tool `nix develop` tries to find a flake.nix in the current directory and
|
||||||
|
enter the shell described by `devShells.x86_64-linux.default` (or the
|
||||||
|
corresponding architecture).
|
||||||
|
|
||||||
|
Now, **it is important that all the files of the repository are committed in
|
||||||
|
git**, as nix will only read what is in the index of git. If we try to enter the
|
||||||
|
shell with the `nix develop` command, it will complain and fail:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% nix develop
|
||||||
|
warning: Git tree '/home/Computational/rarias/jungle-examples' is dirty
|
||||||
|
error: getting status of '/nix/store/0ccnxa25whszw7mgbgyzdm4nqc0zwnm8-source/flake.nix': No such file or directory
|
||||||
|
```
|
||||||
|
|
||||||
|
The first warning states that the git directory has modified files not added to
|
||||||
|
the index. Then the error occurs because the flake.nix is not in the index of
|
||||||
|
git, so `nix develop` doesn't see it. So let's add it to a commit and try again:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% git add flake.nix
|
||||||
|
hut% git commit flake.nix -m 'First shell'
|
||||||
|
[master (root-commit) eb8a4ac] First shell
|
||||||
|
1 file changed, 13 insertions(+)
|
||||||
|
create mode 100644 flake.nix
|
||||||
|
hut% nix develop
|
||||||
|
warning: creating lock file '/home/Computational/rarias/jungle-examples/flake.lock'
|
||||||
|
warning: Git tree '/home/Computational/rarias/jungle-examples' is dirty
|
||||||
|
[nix-develop]$
|
||||||
|
```
|
||||||
|
|
||||||
|
In the `flake.nix` we have set the shell prompt to `[nix-develop]` so we can
|
||||||
|
easily spot that we are inside a `nix develop` shell. To exit:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
[nix-develop]$ exit
|
||||||
|
hut%
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using the flake.lock file
|
||||||
|
|
||||||
|
Now we see the `creating lock file` message and the git tree becomes dirty
|
||||||
|
again (however, we enter the shell successfully).
|
||||||
|
|
||||||
|
This `flake.lock` file that has been created collects the current state of the
|
||||||
|
jungle packages in a file, so future invocations will use the same versions. We
|
||||||
|
can see more details with `nix flake metadata`:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% nix flake metadata
|
||||||
|
warning: Git tree '/home/Computational/rarias/jungle-examples' is dirty
|
||||||
|
Resolved URL: git+file:///home/Computational/rarias/jungle-examples
|
||||||
|
Locked URL: git+file:///home/Computational/rarias/jungle-examples
|
||||||
|
Path: /nix/store/bckxqjkkv52hy4pzgb96r7fchhmvmql8-source
|
||||||
|
Revision: eb8a4ac544a74e3995d859c751e9ff4339de6509-dirty
|
||||||
|
Last modified: 2023-09-15 13:06:12
|
||||||
|
Inputs:
|
||||||
|
└───jungle: path:/nix/store/3wv6q0f3pkgw840nnkn4jsp9xi650dyj-source?lastModified=1694772033&narHash=sha256-7a09O0Jb8WncxeB32ywmQEMqJdEFLrOG/XVT9bdII6I%3D&rev=653d411b9e46076a7878be9574ed6b3bd627cff1&revCount=195
|
||||||
|
├───agenix: github:ryantm/agenix/d8c973fd228949736dedf61b7f8cc1ece3236792
|
||||||
|
│ ├───darwin: github:lnl7/nix-darwin/87b9d090ad39b25b2400029c64825fc2a8868943
|
||||||
|
│ │ └───nixpkgs follows input 'jungle/agenix/nixpkgs'
|
||||||
|
│ ├───home-manager: github:nix-community/home-manager/32d3e39c491e2f91152c84f8ad8b003420eab0a1
|
||||||
|
│ │ └───nixpkgs follows input 'jungle/agenix/nixpkgs'
|
||||||
|
│ └───nixpkgs follows input 'jungle/nixpkgs'
|
||||||
|
├───bscpkgs: git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git?ref=refs/heads/master&rev=3a4062ac04be6263c64a481420d8e768c2521b80
|
||||||
|
│ └───nixpkgs follows input 'jungle/nixpkgs'
|
||||||
|
└───nixpkgs: github:NixOS/nixpkgs/e56990880811a451abd32515698c712788be5720
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, as long as we keep these two files `flake.nix` and `flake.lock`, we can
|
||||||
|
reproduce the same shell in the future, so let's add the lock file into git too.
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% git commit -m 'Add flake.lock file'
|
||||||
|
[master d3725ec] Add flake.lock file
|
||||||
|
1 file changed, 135 insertions(+)
|
||||||
|
create mode 100644 flake.lock
|
||||||
|
hut% git status
|
||||||
|
On branch master
|
||||||
|
nothing to commit, working tree clean
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using the shell with nix develop
|
||||||
|
|
||||||
|
Now, the invocations of `nix develop` won't complain that the git tree is dirty
|
||||||
|
anymore and will enter the shell:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% nix develop
|
||||||
|
[nix-develop]$
|
||||||
|
```
|
||||||
|
|
||||||
|
And the requested packages are now available:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
[nix-develop]$ which ovniemu
|
||||||
|
/nix/store/0yzas8007x9djlpbb0pckcr1vhd0mcfy-ovni-1.3.0/bin/ovniemu
|
||||||
|
```
|
||||||
|
|
||||||
|
The packages of the shell are listed in the `$buildInputs` variable, in case you
|
||||||
|
need to examine them:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
[nix-develop]$ printf '%s\n' $buildInputs
|
||||||
|
/nix/store/0yzas8007x9djlpbb0pckcr1vhd0mcfy-ovni-1.3.0
|
||||||
|
/nix/store/lnjirzllhjn2fadlqzrz7a547iawl8jc-osu-micro-benchmarks-7.1-1
|
||||||
|
[nix-develop]$ exit
|
||||||
|
hut%
|
||||||
|
```
|
||||||
|
|
||||||
|
In the [next chapter](../ch3) we will see how to add more packages and also how to modify
|
||||||
|
them.
|
||||||
160
web/content/intro-nix/ch3.md
Normal file
160
web/content/intro-nix/ch3.md
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
---
|
||||||
|
title: "Chapter 3: Custom packages"
|
||||||
|
date: 2023-09-15
|
||||||
|
weight: 3
|
||||||
|
---
|
||||||
|
|
||||||
|
## Adding more packages
|
||||||
|
|
||||||
|
So far we have define all the packages using:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
pkgs.mkShell rec {
|
||||||
|
pname = "my-shell";
|
||||||
|
buildInputs = with pkgs.bsc; [
|
||||||
|
ovni osumb # other packages here...
|
||||||
|
];
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
This line specifies that all packages come from the `pkgs.bsc` set. We can add
|
||||||
|
additional packages adding them to the list:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
pkgs.mkShell rec {
|
||||||
|
pname = "my-shell";
|
||||||
|
buildInputs = with pkgs.bsc; [
|
||||||
|
ovni osumb sonar
|
||||||
|
];
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
And running `nix develop` again:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% nix develop
|
||||||
|
warning: Git tree '/home/Computational/rarias/jungle-examples' is dirty
|
||||||
|
[nix-develop]$ printf '%s\n' $buildInputs
|
||||||
|
/nix/store/0yzas8007x9djlpbb0pckcr1vhd0mcfy-ovni-1.3.0
|
||||||
|
/nix/store/lnjirzllhjn2fadlqzrz7a547iawl8jc-osu-micro-benchmarks-7.1-1
|
||||||
|
/nix/store/fjxj4xs0wblw3jyhp4vsrsfnlfwawifa-sonar-0.1.0
|
||||||
|
```
|
||||||
|
|
||||||
|
In the jungle cluster, the default MPI implementation is currently set to MPICH,
|
||||||
|
as it can be shown with ldd:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
[nix-develop]$ ldd $(which ovnisync) | grep mpi
|
||||||
|
libmpi.so.12 => /nix/store/nnnaly6hgylravdrmqkhpx1ndg5p79nc-mpich-4.1.2/lib/libmpi.so.12 (0x00007ffff5200000)
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, what if we want to replace the MPI implementation by another one?
|
||||||
|
|
||||||
|
## Modifying a package
|
||||||
|
|
||||||
|
You notice that the packages we are using are coming directly from the ones
|
||||||
|
specified in jungle. However, what if we need to modify some option at build
|
||||||
|
time or change a dependency?
|
||||||
|
|
||||||
|
The Nix language is used to describe how to build each package, and can be
|
||||||
|
extended to create derived versions very easily.
|
||||||
|
|
||||||
|
Let's focus on the `ovni` package. First, to load the definition we can use the
|
||||||
|
`nix edit` command, which opens the definition file using the editor defined in
|
||||||
|
`$EDITOR`:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% nix edit jungle#bsc.ovni
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
This particular package has several inputs that can be modified directly:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
{
|
||||||
|
stdenv
|
||||||
|
, lib
|
||||||
|
, cmake
|
||||||
|
, mpi
|
||||||
|
, fetchFromGitHub
|
||||||
|
, useGit ? false
|
||||||
|
, gitBranch ? "master"
|
||||||
|
, gitUrl ? "ssh://git@bscpm03.bsc.es/rarias/ovni.git"
|
||||||
|
, gitCommit ? "d0a47783f20f8b177a48418966dae45454193a6a"
|
||||||
|
, enableDebug ? false
|
||||||
|
}:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
For example, the `enableDebug` flag, currently set to false, affects how the
|
||||||
|
build is configured:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
cmakeBuildType = if (enableDebug) then "Debug" else "Release";
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, to change this option we could replace `ovni` for our version:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
{
|
||||||
|
inputs.jungle.url = "jungle";
|
||||||
|
nixConfig.bash-prompt = "\[nix-develop\]$ ";
|
||||||
|
|
||||||
|
outputs = { self, jungle }:
|
||||||
|
let
|
||||||
|
pkgs = jungle.outputs.packages.x86_64-linux;
|
||||||
|
ovniDebug = pkgs.bsc.ovni.override { enableDebug = true; };
|
||||||
|
in {
|
||||||
|
devShells.x86_64-linux.default = pkgs.mkShell rec {
|
||||||
|
pname = "my-shell";
|
||||||
|
buildInputs = with pkgs.bsc; [
|
||||||
|
ovniDebug osumb sonar
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
And then, when we now enter the develop shell we can see that ovni gets build
|
||||||
|
with the Debug option:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% nix develop -L
|
||||||
|
warning: Git tree '/home/Computational/rarias/jungle-examples' is dirty
|
||||||
|
ovni> unpacking sources
|
||||||
|
ovni> unpacking source archive /nix/store/cz4si0vsw85r9s6dyiqr5ybngh9aympi-source
|
||||||
|
ovni> source root is source
|
||||||
|
ovni> patching sources
|
||||||
|
ovni> updateAutotoolsGnuConfigScriptsPhase
|
||||||
|
ovni> configuring
|
||||||
|
ovni> fixing cmake files...
|
||||||
|
ovni> cmake flags: ... -DCMAKE_BUILD_TYPE=Debug ...
|
||||||
|
...
|
||||||
|
[nix-develop]$ which ovniver
|
||||||
|
/nix/store/hg0xs7fpibwjhsp9ajqfcbffsh69mrsm-ovni-1.3.0/bin/ovniver
|
||||||
|
|
||||||
|
[nix-develop]$ file $(which ovniver) | fold
|
||||||
|
/nix/store/hg0xs7fpibwjhsp9ajqfcbffsh69mrsm-ovni-1.3.0/bin/ovniver: ELF 64-bit L
|
||||||
|
SB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /nix/st
|
||||||
|
ore/9la894yvmmksqlapd4v16wvxpaw3rg70-glibc-2.37-8/lib/ld-linux-x86-64.so.2, for
|
||||||
|
GNU/Linux 3.10.0, with debug_info, not stripped
|
||||||
|
```
|
||||||
|
|
||||||
|
And we see that the ovniver program is now compiled with debug symbols.
|
||||||
|
|
||||||
|
However, this *only* replaces the ovni package that we specify in the shell. The
|
||||||
|
sonar library also depends on ovni, but that package is still using the old one:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
[nix-develop]$ find $buildInputs -name 'libovni.so.1'
|
||||||
|
/nix/store/hg0xs7fpibwjhsp9ajqfcbffsh69mrsm-ovni-1.3.0/lib/libovni.so.1
|
||||||
|
|
||||||
|
[nix-develop]$ find $buildInputs -name 'libsonar-mpi.so'
|
||||||
|
/nix/store/fjxj4xs0wblw3jyhp4vsrsfnlfwawifa-sonar-0.1.0/lib/libsonar-mpi.so
|
||||||
|
|
||||||
|
[nix-develop]$ ldd /nix/store/fjxj4xs0wblw3jyhp4vsrsfnlfwawifa-sonar-0.1.0/lib/libsonar-mpi.so | grep ovni
|
||||||
|
libovni.so.1 => /nix/store/0yzas8007x9djlpbb0pckcr1vhd0mcfy-ovni-1.3.0/lib/libovni.so.1 (0x00007ffff7f8d000)
|
||||||
|
```
|
||||||
|
|
||||||
|
In the [next chapter](../ch4) we will see how to replace packages in such a way
|
||||||
|
that all the dependences are automatically updated too.
|
||||||
29
web/content/intro-nix/ch4.md
Normal file
29
web/content/intro-nix/ch4.md
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
---
|
||||||
|
title: "Chapter 4: Adding an overlay"
|
||||||
|
date: 2023-09-15
|
||||||
|
weight: 4
|
||||||
|
---
|
||||||
|
|
||||||
|
NOTE: We shouldn't be instructing users to use an overlay to replace packages in
|
||||||
|
`bsc.` until we have determined if we move them to the root attribute set
|
||||||
|
first!
|
||||||
|
|
||||||
|
```nix
|
||||||
|
{
|
||||||
|
inputs.jungle.url = "jungle";
|
||||||
|
nixConfig.bash-prompt = "\[nix-develop\]$ ";
|
||||||
|
|
||||||
|
outputs = { self, jungle }:
|
||||||
|
let
|
||||||
|
pkgs = jungle.outputs.packages.x86_64-linux;
|
||||||
|
ovniDebug = pkgs.bsc.ovni.override { enableDebug = true; };
|
||||||
|
in {
|
||||||
|
devShells.x86_64-linux.default = pkgs.mkShell rec {
|
||||||
|
pname = "my-shell";
|
||||||
|
buildInputs = with pkgs.bsc; [
|
||||||
|
ovniDebug osumb sonar
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
```
|
||||||
71
web/content/posts/2023-09-12/_index.md
Normal file
71
web/content/posts/2023-09-12/_index.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
---
|
||||||
|
title: "Update 2023-09-12"
|
||||||
|
author: "Rodrigo Arias Mallo"
|
||||||
|
date: 2023-09-12
|
||||||
|
---
|
||||||
|
|
||||||
|
This is a summary of notable changes introduced in the jungle cluster in the
|
||||||
|
last months.
|
||||||
|
|
||||||
|
### New Ceph filesystem available
|
||||||
|
|
||||||
|
We have installed the latest [Ceph filesystem][1] (18.2.0) which stores three
|
||||||
|
redundant copies of the data so a failure in one disk doesn't cause data loss.
|
||||||
|
It is mounted in /ceph and available for use in the owl1, owl2 and hut
|
||||||
|
nodes. For now it provides 2.8 TiB of space and it is expected to
|
||||||
|
increase when the last storage node is installed.
|
||||||
|
|
||||||
|
[1]: https://en.wikipedia.org/wiki/Ceph_(software)
|
||||||
|
|
||||||
|
The throughput is limited by the 1 Gigabit Ethernet speed, but should be
|
||||||
|
reasonably fast for most workloads. Here is a test with dd which reaches the
|
||||||
|
network limit:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% dd if=/dev/urandom of=/ceph/rarias/urandom bs=1M count=1024
|
||||||
|
1024+0 records in
|
||||||
|
1024+0 records out
|
||||||
|
1073741824 bytes (1,1 GB, 1,0 GiB) copied, 8,98544 s, 119 MB/s
|
||||||
|
```
|
||||||
|
|
||||||
|
### SLURM power save
|
||||||
|
|
||||||
|
The SLURM daemon has been configured to power down the nodes after one hour of
|
||||||
|
idling. When a new job is allocated to a node that is powered off, it is
|
||||||
|
automatically turned on and as soon as it becomes available it will execute the
|
||||||
|
job. Here is an example with two nodes that boot and execute a simple job that
|
||||||
|
shows the date.
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% date; srun -N 2 date
|
||||||
|
2023-09-12T17:36:09 CEST
|
||||||
|
2023-09-12T17:38:26 CEST
|
||||||
|
2023-09-12T17:38:18 CEST
|
||||||
|
```
|
||||||
|
|
||||||
|
You can expect a similar delay (around 2-3 min) while the nodes are starting.
|
||||||
|
Notice that while the nodes are kept on, the delay is not noticeable:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
hut% date; srun -N 2 date
|
||||||
|
2023-09-12T17:40:04 CEST
|
||||||
|
2023-09-12T17:40:04 CEST
|
||||||
|
2023-09-12T17:40:04 CEST
|
||||||
|
```
|
||||||
|
|
||||||
|
### Power and temperature monitoring
|
||||||
|
|
||||||
|
In the cluster, we monitor the temperature and the power draw of all nodes. This
|
||||||
|
allows us to understand which machines are not being used and turn them off to
|
||||||
|
save energy that otherwise would be wasted. Here is an example where some nodes
|
||||||
|
are powered off to save energy:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
We also configured the nodes to work at low CPU frequencies, so the temperature
|
||||||
|
is kept low to increase the lifespan of the node components. Towards these
|
||||||
|
goals, we have configured two alerts that trigger when the CPUs of a node
|
||||||
|
exceeds the limit temperature of 80 °C or when the power draw exceeds 350 W.
|
||||||
|
|
||||||
|
By keeping the power consumption and temperatures controlled, we can safely
|
||||||
|
incorporate more machines that will only be used on demand.
|
||||||
BIN
web/content/posts/2023-09-12/power.png
Normal file
BIN
web/content/posts/2023-09-12/power.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
@@ -3,3 +3,6 @@ languageCode = 'en-us'
|
|||||||
title = 'The jungle'
|
title = 'The jungle'
|
||||||
theme = 'PaperMod'
|
theme = 'PaperMod'
|
||||||
sectionPagesMenu = "main"
|
sectionPagesMenu = "main"
|
||||||
|
|
||||||
|
[params]
|
||||||
|
ShowBreadCrumbs = true
|
||||||
|
|||||||
Reference in New Issue
Block a user