Compare commits

..

10 Commits

Author SHA1 Message Date
4a52970821 Add 10 min shutdown jitter to avoid spikes
The shutdown timer will fire at slightly different times for the
different nodes, so we slowly decrease the power consumption.
2024-07-22 11:20:02 +02:00
7b58d8fbcc Don't mount the nix store in owl nodes
Initially we planned to run jobs in those nodes by sharing the same nix
store from hut. However, these nodes are now used to build packages
which are not available in hut. Users also ssh to the nodes, which
doesn't mount the hut store, so it doesn't make much sense to keep
mounting it.
2024-07-22 11:02:32 +02:00
f3167c0cc0 Emulate other architectures in owl nodes too
Allows cross-compilation of packages for RISC-V that are known to try to
run RISC-V programs in the host.
2024-07-19 17:55:12 +02:00
d3489f8e48 Program shutdown for August 2nd for all machines 2024-07-18 18:07:12 +02:00
86f5bea6c7 Enable debuginfod daemon in owl nodes
WARNING: This will introduce noise, as the daemon wakes up from time to
time to check for new packages.
2024-07-18 16:12:39 +02:00
accb656c5e Set gitea and grafana log level to warn
Prevents filling the journal logs with information messages.
2024-07-18 15:39:02 +02:00
5e6cf2b563 Set default SLURM job time limit to one hour
Prevents enless jobs from being left forever, while allow users to
request a larger time limit.
2024-07-18 11:44:01 +02:00
29110d2d54 Allow other jobs to run in unused cores
The current select mechanism was using the memory too as a consumable
resource, which by default only sets 1 MiB per node. As each job already
requests 1 MiB, it prevents other jobs from running.

As we are not really concerned with memory usage, we only use the unused
cores in the select criteria.
2024-07-18 11:19:03 +02:00
32c919d1fc Use authentication tokens for PM GitLab runner
Starting with GitLab 16, there is a new mechanism to authenticate the
runners via authentication tokens, so use it instead.  Older tokens and
runners are also removed, as they are no longer used.

With the new way of managing tokens, both the tags and the locked state
are managed from the GitLab web page.

See: https://docs.gitlab.com/ee/ci/runners/new_creation_workflow.html
2024-07-17 17:41:02 +02:00
dba11ea88a flake.lock: Update
Flake lock file updates:

• Updated input 'agenix':
    'github:ryantm/agenix/1381a759b205dff7a6818733118d02253340fd5e' (2024-04-02)
  → 'github:ryantm/agenix/de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6' (2024-07-09)
• Updated input 'nixpkgs':
    'github:NixOS/nixpkgs/6143fc5eeb9c4f00163267708e26191d1e918932' (2024-04-21)
  → 'github:NixOS/nixpkgs/693bc46d169f5af9c992095736e82c3488bf7dbb' (2024-07-14)
2024-07-17 14:50:01 +02:00
17 changed files with 57 additions and 56 deletions

View File

@ -10,11 +10,11 @@
"systems": "systems"
},
"locked": {
"lastModified": 1712079060,
"narHash": "sha256-/JdiT9t+zzjChc5qQiF+jhrVhRt8figYH29rZO7pFe4=",
"lastModified": 1720546205,
"narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=",
"owner": "ryantm",
"repo": "agenix",
"rev": "1381a759b205dff7a6818733118d02253340fd5e",
"rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6",
"type": "github"
},
"original": {
@ -88,11 +88,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1713714899,
"narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=",
"lastModified": 1720957393,
"narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "6143fc5eeb9c4f00163267708e26191d1e918932",
"rev": "693bc46d169f5af9c992095736e82c3488bf7dbb",
"type": "github"
},
"original": {

View File

@ -3,6 +3,7 @@
# Includes the basic configuration for an Intel server.
imports = [
./base/agenix.nix
./base/august-shutdown.nix
./base/boot.nix
./base/env.nix
./base/fs.nix

View File

@ -0,0 +1,14 @@
{
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
# hardware from spurious electrical peaks on the yearly electrical cut for
# manteinance that starts on August 4th.
systemd.timers.august-shutdown = {
description = "Shutdown on August 2nd for maintenance";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*-08-02 11:00:00";
RandomizedDelaySec = "10min";
Unit = "systemd-poweroff.service";
};
};
}

View File

@ -6,6 +6,7 @@
../module/ceph.nix
../module/debuginfod.nix
../module/emulation.nix
../module/slurm-client.nix
./gitlab-runner.nix
./monitoring.nix
@ -19,8 +20,6 @@
#./pxe.nix
];
boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ];
# Select the this using the ID to avoid mismatches
boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";

View File

@ -17,6 +17,7 @@
REGISTER_MANUAL_CONFIRM = true;
ENABLE_NOTIFY_MAIL = true;
};
log.LEVEL = "Warn";
mailer = {
ENABLED = true;

View File

@ -1,9 +1,8 @@
{ pkgs, lib, config, ... }:
{
age.secrets.ovniToken.file = ../../secrets/ovni-token.age;
age.secrets.gitlabToken.file = ../../secrets/gitlab-bsc-es-token.age;
age.secrets.nosvToken.file = ../../secrets/nosv-token.age;
age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age;
age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age;
services.gitlab-runner = {
enable = true;
@ -11,20 +10,14 @@
services = let
common-shell = {
executor = "shell";
tagList = [ "nix" "xeon" ];
registrationFlags = [
# Using space doesn't work, and causes it to misread the next flag
"--locked='false'"
];
environmentVariables = {
SHELL = "${pkgs.bash}/bin/bash";
};
};
common-docker = {
executor = "docker";
dockerImage = "debian:stable";
tagList = [ "docker" "xeon" ];
registrationFlags = [
"--locked='false'"
"--docker-network-mode host"
];
environmentVariables = {
@ -33,19 +26,12 @@
};
};
in {
# For gitlab.bsc.es
gitlab-bsc-es-shell = common-shell // {
registrationConfigFile = config.age.secrets.gitlabToken.path;
};
gitlab-bsc-es-docker = common-docker // {
registrationConfigFile = config.age.secrets.gitlabToken.path;
};
# For pm.bsc.es/gitlab
gitlab-pm-shell = common-shell // {
registrationConfigFile = config.age.secrets.ovniToken.path;
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path;
};
gitlab-pm-docker = common-docker // {
registrationConfigFile = config.age.secrets.ovniToken.path;
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path;
};
};
};

View File

@ -31,6 +31,7 @@
};
feature_toggles.publicDashboards = true;
"auth.anonymous".enabled = true;
log.level = "warn";
};
};

3
m/module/emulation.nix Normal file
View File

@ -0,0 +1,3 @@
{
boot.binfmt.emulatedSystems = [ "armv7l-linux" "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ];
}

View File

@ -47,8 +47,8 @@ in {
];
partitionName = [
"owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP"
"all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP"
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
];
# See slurm.conf(5) for more details about these options.
@ -83,6 +83,14 @@ in {
# Reduce port range so we can allow only this range in the firewall
SrunPortRange=60000-61000
# Use cores as consumable resources. In SLURM terms, a core may have
# multiple hardware threads (or CPUs).
SelectType=select/cons_tres
# Ignore memory constraints and only use unused cores to share a node with
# other jobs.
SelectTypeParameters=CR_CORE
'';
};

View File

@ -4,9 +4,10 @@
imports = [
../common/xeon.nix
../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix
../module/slurm-hut-nix-store.nix
../module/debuginfod.nix
];
# Select the this using the ID to avoid mismatches

View File

@ -4,9 +4,10 @@
imports = [
../common/xeon.nix
../module/ceph.nix
../module/emulation.nix
../module/slurm-client.nix
../module/slurm-firewall.nix
../module/slurm-hut-nix-store.nix
../module/debuginfod.nix
];
# Select the this using the ID to avoid mismatches

View File

@ -1,11 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg caTbx0NBmsTSmZH4HtBaxhsauWqWUDTesJqT08UsoEQ
8ND31xuco+H8d5SKg8xsCFRPVDhU4d8UKwV1BnmKVjQ
-> ssh-ed25519 CAWG4Q 4ETYuhCwHHECkut4DWDknMMgpAvFqtzLWVC2Wi2L8FM
BGMvRnAfd8qZG5hzLefmk32FkGvwzE9pqBUyx4JY0co
-> ssh-ed25519 MSF3dg hj5QL4ZfylN8/W/MXQHvVqtI7mRvlQOYr8HsaQEmPB0
kvB7sljmmkswSGZDQnrwdTbTsN78EAwH3pz1pPe0Hu0
-> )Q-grease vHF} [8p1> @7z;C"/
tgSUKFyyrf2jLXZp+pakigwB2fRO/WFj2Qnt1aPjtVPEK92JbJ4
--- xzM0AhV4gTQE0Q7inJNo9vFj+crJQxWeI7u9pl7bqAI
á6nGJÖ0Bˆ7F° bßÙ½2®L³äÇ]²2zl<7A>À&e†KÄx®àé9SWNàV"MfŽ€ëÙKHUC:1b;9St‰ëõ±DuѧçÏ¢žÌŸ¡<02>èÐéîÀ<C380>ÔfÕ7¨î1§I(õdÓþô‡ï ó

View File

@ -0,0 +1,9 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg WvKK6U1wQtx2pbUDfuaUIXTQiCulDkz7hgUCSwMfMzQ
jLktUMqKuVxukqzz++pHOKvmucUQqeKYy5IwBma7KxY
-> ssh-ed25519 CAWG4Q XKGuNNoYFl9bdZzsqYYTY7GsEt5sypLW4R+1uk78NmU
8dIA2GzRAwTGM5CDHSM2BUBsbXzEAUssWUz2PY2PaTg
-> ssh-ed25519 MSF3dg T630RsKuZIF/bp+KITnIIWWHsg6M/VQGqbWQZxqT+AA
SraZcgZJVtmUzHF/XR9J7aK5t5EDNpkC/av/WJUT/G8
--- /12G8pj9sbs591OM/ryhoLnSWWmzYcoqprk9uN/3g18
ä·ù¼Â‡%å]yi"ô<>»LÓ âùH`ªa$Æþ)¦9ve<76>.0úmÉK<EFBFBD>vƒÀ ïu"|1cÞ-%ÔÕ"åWFï¡ÞA«<41>hº$•ºj<eñ¶xÅLx«ç.?œÈâ:L…¬ƒ,ëu»|³F|Õi²äÔ

Binary file not shown.

View File

@ -1,11 +0,0 @@
age-encryption.org/v1
-> ssh-ed25519 HY2yRg hrdS7Dl/j+u3XVfM79ZJpZSlre9TcD7DTQ+EEAT6kEE
avUO96P1h7w2BYWgrQ7GpUgdaCV9AZL7eOTTcF9gfro
-> ssh-ed25519 CAWG4Q A5raRY1CAgFYZgoQ92GMyNejYNdHx/7Y6uTS+EjLPWA
FRFqT2Jz7qRcybaxkQTKHGl797LVXoHpYG4RZSrX/70
-> ssh-ed25519 MSF3dg D+R80Bg7W9AuiOMAqtGFZQl994dRBIegYRLmmTaeZ3o
BHvZsugRiuZ91b4jk91h30o3eF3hadSnVCwxXge95T8
-> BT/El`a-grease W{nq|Vm )bld 2Nl}4 N$#JGB4t
oLG+0S1aGfO/ohCfgGmhDhwwLi4H
--- 2I5C+FvBG/K1ZHh7C5QD39feTSLoFGwcTeZAmeILNsI
¹õW©ÙÄd;ËÐC¾.¹¡_(“u G¡€‰#ìvâœgÉ<67>†õõy¹Y‰žl9ŒÈ¡Ïµ.Œé0x<30>Þ½úN. <>tB×b‡ü¼K¼ì:Q×—È\¹ÀÍT_´»Átxïm——_JñÞž-š

Binary file not shown.

View File

@ -6,10 +6,9 @@ let
safe = keys.hostGroup.safe ++ adminsKeys;
in
{
"gitlab-bsc-es-token.age".publicKeys = hut;
"gitea-runner-token.age".publicKeys = hut;
"ovni-token.age".publicKeys = hut;
"nosv-token.age".publicKeys = hut;
"gitlab-runner-docker-token.age".publicKeys = hut;
"gitlab-runner-shell-token.age".publicKeys = hut;
"nix-serve.age".publicKeys = hut;
"jungle-robot-password.age".publicKeys = hut;