Compare commits

...

12 Commits

Author SHA1 Message Date
c4a63b8ffd weasel: use tent cache 2025-09-30 15:38:54 +02:00
47da32d2cb Add nixfmt-rfc-style to common packages 2025-09-30 15:38:53 +02:00
a0a425b013 Add packages to user abonerib 2025-09-30 15:38:53 +02:00
6db0d7e1ef Add nix-output-monitor to default packages 2025-09-30 15:38:52 +02:00
3275646804 Set fish shell for user abonerib 2025-09-30 15:38:52 +02:00
b29403db13 weasel: create user folders in /var/lib/podman-users
/home is a nfs mount, which does not support extra filesystem arguments
needed to run podman. We need to have a local home.
2025-09-30 15:38:51 +02:00
8e2d703492 weasel: add podman 2025-09-30 15:38:51 +02:00
db6a3faa44 Enable nosv system feature 2025-09-30 15:37:14 +02:00
163d19bd05 Lower connect timeout when using hut substituter
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-09-29 18:44:48 +02:00
360f67cfab Use hut substituter in all nodes
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-09-29 18:44:38 +02:00
a402bc880c Remove machine access for user csiringo
Reviewed-by: Rodrigo Arias Mallo <rodrigo.arias@bsc.es>
2025-09-29 18:23:24 +02:00
c441178910 Add web post update for 2025
Reviewed-by: Aleix Boné <abonerib@bsc.es>
2025-09-29 18:02:21 +02:00
17 changed files with 126 additions and 18 deletions

View File

@@ -5,6 +5,7 @@
../common/xeon.nix
../common/ssf/hosts.nix
../module/ceph.nix
../module/hut-substituter.nix
../module/slurm-server.nix
./nfs.nix
./wireguard.nix
@@ -65,10 +66,4 @@
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
'';
};
# Use tent for cache
nix.settings = {
extra-substituters = [ "https://jungle.bsc.es/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
}

View File

@@ -3,6 +3,7 @@
{
imports = [
../common/ssf.nix
../module/hut-substituter.nix
../module/monitoring.nix
];

View File

@@ -11,11 +11,13 @@
./base/hw.nix
./base/net.nix
./base/nix.nix
./base/nosv.nix
./base/ntp.nix
./base/rev.nix
./base/ssh.nix
./base/users.nix
./base/watchdog.nix
./base/zsh.nix
./base/fish.nix
];
}

View File

@@ -5,6 +5,8 @@
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
ncdu config.boot.kernelPackages.perf ldns pv
nix-output-monitor
nixfmt-rfc-style
# From bsckgs overlay
osumb
];

4
m/common/base/fish.nix Normal file
View File

@@ -0,0 +1,4 @@
{ ... }:
{
programs.fish.enable = true;
}

9
m/common/base/nosv.nix Normal file
View File

@@ -0,0 +1,9 @@
{ ... }:
{
nix.settings.system-features = [ "nosv" ];
programs.nix-required-mounts.enable = true;
programs.nix-required-mounts.allowedPatterns.nosv.paths = [
"/sys/devices/system/cpu"
"/sys/devices/system/node"
];
}

View File

@@ -87,6 +87,12 @@
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
];
shell = pkgs.fish;
packages = with pkgs; [
starship
jujutsu
neovim
];
};
vlopez = {
@@ -162,7 +168,7 @@
home = "/home/Computational/csiringo";
description = "Cesare Siringo";
group = "Computational";
hosts = [ "apex" "weasel" ];
hosts = [ ];
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"

View File

@@ -9,6 +9,7 @@
./cpufreq.nix
./fs.nix
./users.nix
../module/hut-substituter.nix
../module/debuginfod.nix
];

View File

@@ -8,6 +8,7 @@
../module/emulation.nix
../module/nvidia.nix
../module/slurm-client.nix
../module/hut-substituter.nix
./wireguard.nix
];
@@ -62,12 +63,6 @@
interfaces.enp1s0f0np0.useDHCP = true;
};
# Use hut for cache
nix.settings = {
extra-substituters = [ "https://jungle.bsc.es/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
# Recommended for new graphics cards
hardware.nvidia.open = true;

View File

@@ -4,6 +4,7 @@
imports = [
../common/ssf.nix
../module/monitoring.nix
../module/hut-substituter.nix
];
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";

View File

@@ -6,5 +6,8 @@
{
extra-substituters = [ "http://hut/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
# Set a low timeout in case hut is down
connect-timeout = 3; # seconds
};
}

View File

@@ -9,6 +9,7 @@
../module/nvidia.nix
../eudy/kernel/perf.nix
./wireguard.nix
../module/hut-substituter.nix
];
# Don't install Grub on the disk yet
@@ -51,11 +52,6 @@
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
};
nix.settings = {
extra-substituters = [ "https://jungle.bsc.es/cache" ];
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
};
# Enable performance governor
powerManagement.cpuFreqGovernor = "performance";

View File

@@ -15,6 +15,7 @@
../hut/msmtp.nix
../module/p.nix
../module/vpn-dac.nix
../module/hut-substituter.nix
];
# Select the this using the ID to avoid mismatches

View File

@@ -3,6 +3,8 @@
{
imports = [
../common/ssf.nix
../module/hut-substituter.nix
./virtualization.nix
];
# Select this using the ID to avoid mismatches
@@ -29,4 +31,5 @@
prefixLength = 24;
} ];
};
}

View File

@@ -0,0 +1,40 @@
{
lib,
pkgs,
config,
...
}:
{
# Enable common container config files in /etc/containers
virtualisation.containers.enable = true;
virtualisation = {
podman = {
enable = true;
# Required for containers under podman-compose to be able to talk to each other.
defaultNetwork.settings.dns_enabled = true;
};
};
# We cannot use /home since nfs does not support fileattrs needed by podman
systemd.tmpfiles.settings = {
"podman-users" = lib.mapAttrs' (
name: value:
lib.nameValuePair ("/var/lib/podman-users/" + name) {
d = {
group = value.group;
mode = value.homeMode;
user = name;
};
}
) (lib.filterAttrs (_: x: x.isNormalUser) config.users.users);
};
# Useful other development tools
environment.systemPackages = with pkgs; [
dive # look into docker image layers
podman-tui # status of containers in the terminal
podman-compose # start group of containers for dev
];
}

View File

@@ -0,0 +1,49 @@
---
title: "Update 2025-09-26"
author: "Rodrigo Arias Mallo"
date: 2025-09-26
---
This is a summary of notable changes introduced in the last two years. We
continue to maintain all machines updated to the last NixOS release (currently
NixOS 25.05).
### New compute node: fox
We have a new [fox machine](/fox), with two AMD Genoa 9684X CPUs and two NVIDIA
RTX4000 GPUs. During the last months we have been doing some tests and it seems
that most of the components work well. We have configured CUDA to use the NVIDIA
GPUs, as well as AMD uProf to trace performance and energy counters from the
CPUs.
### Upgraded login node: apex
We have upgraded the operating system on the login node to NixOS, which now runs
Linux 6.15.6. During the upgrade, we have detected a problem with the storage
disks. The `/` and `/home` partitions sit on a
[RAID 5](https://en.wikipedia.org/wiki/Standard_RAID_levels#RAID_5),
transparently handled by a RAID hardware controller which starts its own
firmware before passing the control to the BIOS to continue the boot sequence. A
problem during the startup of the firmware prevented the node to even reach the
BIOS screen.
After a long debugging session, we detected that the flash memory that stores
the firmware of the hardware controller was likely to be the issue, since
[memory cells](https://en.wikipedia.org/wiki/Flash_memory#Principles_of_operation)
may lose charge over time and can end up corrupting the content. We flashed
the latest firmware so the memory cells are charged again with the new bits and
that fixed the problem. Hopefully we will be able to use it for some more years.
The SLURM server has been moved to apex which allows users to also submit jobs
to fox.
### Migrated machines to BSC building
The server room had a temperature issue that had been affecting our machines
since the end of February of 2025. As the summer approached, the temperature
exceeded the safe limits for our hardware, so we had to shutdown the cluster.
![Room temperature](temp.png)
Since then, we have moved the cluster to BSC premises, where it now rests at a
stable temperature, so hopefully we won't have more unscheduled downtime.

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB