Compare commits

..

4 Commits

Author SHA1 Message Date
5cfd7f0858 Fetch website from its own git repository
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-02 15:45:21 +02:00
9c5e22d62d Add script to trim the repository
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-02 15:44:56 +02:00
00fe0f46a1 Add acinca user
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-10-01 12:27:43 +02:00
79940876c3 Restart slurmd on failure
A failure to reach the control node can cause slurmd to fail and the
unit remains in the failed state until is manually restarted. Instead,
try to restart the service every 30 seconds, forever:

    owl1% systemctl show slurmd | grep -E 'Restart=|RestartUSec='
    Restart=on-failure
    RestartUSec=30s
    owl1% pgrep slurmd
    5903
    owl1% sudo kill -SEGV 5903
    owl1% pgrep slurmd
    6137

Fixes: rarias/jungle#177
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-09-30 17:20:39 +02:00
11 changed files with 75 additions and 70 deletions

46
doc/trim.sh Executable file
View File

@@ -0,0 +1,46 @@
#!/bin/sh
# Trims the jungle repository by moving the website to its own repository and
# removing it from jungle. It also removes big pdf files and kernel
# configurations so the jungle repository is small.
set -e
if [ -e oldjungle -o -e newjungle -o -e website ]; then
echo "remove oldjungle/, newjungle/ and website/ first"
exit 1
fi
# Clone the old jungle repo
git clone gitea@tent:rarias/jungle.git oldjungle
# First split the website into a new repository
mkdir website && git -C website init -b master
git-filter-repo \
--path web \
--subdirectory-filter web \
--source oldjungle \
--target website
# Then remove the website, pdf files and big kernel configs
mkdir newjungle && git -C newjungle init -b master
git-filter-repo \
--invert-paths \
--path web \
--path-glob 'doc*.pdf' \
--path-glob '**/kernel/configs/lockdep' \
--path-glob '**/kernel/configs/defconfig' \
--source oldjungle \
--target newjungle
set -x
du -sh oldjungle newjungle website
# 57M oldjungle
# 2,3M newjungle
# 6,4M website
du -sh --exclude=.git oldjungle newjungle website
# 30M oldjungle
# 700K newjungle
# 3,5M website

View File

@@ -11,13 +11,11 @@
./base/hw.nix
./base/net.nix
./base/nix.nix
./base/nosv.nix
./base/ntp.nix
./base/rev.nix
./base/ssh.nix
./base/users.nix
./base/watchdog.nix
./base/zsh.nix
./base/fish.nix
];
}

View File

@@ -5,8 +5,6 @@
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns pv
nix-output-monitor
nixfmt-rfc-style
# From bsckgs overlay
osumb
];

View File

@@ -1,4 +0,0 @@
{ ... }:
{
programs.fish.enable = true;
}

View File

@@ -1,9 +0,0 @@
{ ... }:
{
nix.settings.system-features = [ "nosv" ];
programs.nix-required-mounts.enable = true;
programs.nix-required-mounts.allowedPatterns.nosv.paths = [
"/sys/devices/system/cpu"
"/sys/devices/system/node"
];
}

View File

@@ -87,12 +87,6 @@
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
];
shell = pkgs.fish;
packages = with pkgs; [
starship
jujutsu
neovim
];
};
vlopez = {
@@ -162,7 +156,6 @@
};
csiringo = {
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
uid = 9653;
isNormalUser = true;
home = "/home/Computational/csiringo";
@@ -174,6 +167,19 @@
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
];
};
acinca = {
uid = 9654;
isNormalUser = true;
home = "/home/Computational/acinca";
description = "Arnau Cinca";
group = "Computational";
hosts = [ "apex" "hut" "fox" "owl1" "owl2" ];
hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc"
];
};
};
groups = {

View File

@@ -2,10 +2,13 @@
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = theFlake;
src = pkgs.fetchgit {
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
};
buildInputs = [ pkgs.hugo ];
buildPhase = ''
cd web
rm -rf public/
hugo
'';

View File

@@ -12,6 +12,12 @@
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
KillMode = lib.mkForce "control-group";
# If slurmd fails to contact the control server it will fail, causing the
# node to remain out of service until manually restarted. Always try to
# restart it.
Restart = "always";
RestartSec = "30s";
};
services.slurm.client.enable = true;

View File

@@ -2,10 +2,13 @@
let
website = pkgs.stdenv.mkDerivation {
name = "jungle-web";
src = theFlake;
src = pkgs.fetchgit {
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
};
buildInputs = [ pkgs.hugo ];
buildPhase = ''
cd web
rm -rf public/
hugo
'';

View File

@@ -4,7 +4,6 @@
imports = [
../common/ssf.nix
../module/hut-substituter.nix
./virtualization.nix
];
# Select this using the ID to avoid mismatches
@@ -31,5 +30,4 @@
prefixLength = 24;
} ];
};
}

View File

@@ -1,40 +0,0 @@
{
lib,
pkgs,
config,
...
}:
{
# Enable common container config files in /etc/containers
virtualisation.containers.enable = true;
virtualisation = {
podman = {
enable = true;
# Required for containers under podman-compose to be able to talk to each other.
defaultNetwork.settings.dns_enabled = true;
};
};
# We cannot use /home since nfs does not support fileattrs needed by podman
systemd.tmpfiles.settings = {
"podman-users" = lib.mapAttrs' (
name: value:
lib.nameValuePair ("/var/lib/podman-users/" + name) {
d = {
group = value.group;
mode = value.homeMode;
user = name;
};
}
) (lib.filterAttrs (_: x: x.isNormalUser) config.users.users);
};
# Useful other development tools
environment.systemPackages = with pkgs; [
dive # look into docker image layers
podman-tui # status of containers in the terminal
podman-compose # start group of containers for dev
];
}