forked from rarias/jungle
Compare commits
39 Commits
share-file
...
master
Author | SHA1 | Date | |
---|---|---|---|
edf744db8d | |||
b82894eaec | |||
1c47199891 | |||
8738bd4eeb | |||
7699783aac | |||
fee1d4da7e | |||
b77ce7fb56 | |||
b4a12625c5 | |||
302106ea9a | |||
96877de8d9 | |||
8878985be6 | |||
737578db34 | |||
88555e3f8c | |||
feb2060be7 | |||
00999434c2 | |||
29d58cc62d | |||
587caf262e | |||
2730404ca5 | |||
84db5e6fd6 | |||
f4f34a3159 | |||
91b8b4a3c5 | |||
6cad205269 | |||
c57bf76969 | |||
ad4b615211 | |||
b4518b59cf | |||
45dc4124a3 | |||
bdfe9a48fd | |||
1b337d31f8 | |||
717cd5a21e | |||
def5955614 | |||
0e3c975cb5 | |||
93189a575e | |||
36592c44eb | |||
a34e3752a2 | |||
0d2dea94fb | |||
7f539d7e06 | |||
f8ec090836 | |||
9a9161fc55 | |||
1a0cf96fc4 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
*.swp
|
||||
/result
|
||||
/misc
|
||||
|
22
flake.lock
generated
22
flake.lock
generated
@ -10,11 +10,11 @@
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1720546205,
|
||||
"narHash": "sha256-boCXsjYVxDviyzoEyAk624600f3ZBo/DKtUdvMTpbGY=",
|
||||
"lastModified": 1723293904,
|
||||
"narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=",
|
||||
"owner": "ryantm",
|
||||
"repo": "agenix",
|
||||
"rev": "de96bd907d5fbc3b14fc33ad37d1b9a3cb15edc6",
|
||||
"rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -30,11 +30,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1713974364,
|
||||
"narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=",
|
||||
"lastModified": 1732868163,
|
||||
"narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "de89197a4a7b162db7df9d41c9d07759d87c5709",
|
||||
"revCount": 937,
|
||||
"rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f",
|
||||
"revCount": 952,
|
||||
"type": "git",
|
||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||
},
|
||||
@ -88,16 +88,16 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1720957393,
|
||||
"narHash": "sha256-oedh2RwpjEa+TNxhg5Je9Ch6d3W1NKi7DbRO1ziHemA=",
|
||||
"lastModified": 1736867362,
|
||||
"narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "693bc46d169f5af9c992095736e82c3488bf7dbb",
|
||||
"rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"ref": "nixos-24.11",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
|
||||
agenix.url = "github:ryantm/agenix";
|
||||
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
||||
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
|
||||
@ -25,6 +25,7 @@ in
|
||||
bay = mkConf "bay";
|
||||
lake2 = mkConf "lake2";
|
||||
raccoon = mkConf "raccoon";
|
||||
fox = mkConf "fox";
|
||||
};
|
||||
|
||||
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {
|
||||
|
3
keys.nix
3
keys.nix
@ -9,10 +9,11 @@ rec {
|
||||
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
||||
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
||||
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
||||
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDa9lId4rB/EKGkkCCVOy0cuId2SYLs+8W8kx0kmpO1y fox";
|
||||
};
|
||||
|
||||
hostGroup = with hosts; rec {
|
||||
compute = [ owl1 owl2 ];
|
||||
compute = [ owl1 owl2 fox ];
|
||||
playground = [ eudy koro ];
|
||||
storage = [ bay lake2 ];
|
||||
monitor = [ hut ];
|
||||
|
@ -32,6 +32,21 @@
|
||||
};
|
||||
};
|
||||
|
||||
# The nix-gc.service can begin its execution *before* /home is mounted,
|
||||
# causing it to remove all gcroots considering them as stale, as it cannot
|
||||
# access the symlink. To prevent this problem, we force the service to wait
|
||||
# until /home is mounted as well as other remote FS like /ceph.
|
||||
systemd.services.nix-gc = {
|
||||
# Start remote-fs.target if not already being started and fail if it fails
|
||||
# to start. It will also be stopped if the remote-fs.target fails after
|
||||
# starting successfully.
|
||||
bindsTo = [ "remote-fs.target" ];
|
||||
# Wait until remote-fs.target fully starts before starting this one.
|
||||
after = [ "remote-fs.target"];
|
||||
# Ensure we can access a remote path inside /home
|
||||
unitConfig.ConditionPathExists = "/home/Computational";
|
||||
};
|
||||
|
||||
# This value determines the NixOS release from which the default
|
||||
# settings for stateful data, like file locations and database versions
|
||||
# on your system were taken. It‘s perfectly fine and recommended to leave
|
||||
|
@ -13,10 +13,16 @@ in
|
||||
Host bscpm02.bsc.es bscpm03.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
|
||||
User git
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
|
||||
# Connect to BSC machines via hut proxy too
|
||||
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
'';
|
||||
|
||||
programs.ssh.knownHosts = hostsKeys // {
|
||||
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
||||
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
||||
"glogin1.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
|
||||
"glogin2.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFsHsZGCrzpd4QDVn5xoDOtrNBkb0ylxKGlyBt6l9qCz";
|
||||
};
|
||||
}
|
||||
|
@ -68,7 +68,7 @@
|
||||
home = "/home/Computational/anavarro";
|
||||
description = "Antoni Navarro";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" "raccoon" ];
|
||||
hosts = [ "hut" "raccoon" "fox" ];
|
||||
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
|
||||
@ -81,7 +81,7 @@
|
||||
home = "/home/Computational/abonerib";
|
||||
description = "Aleix Boné";
|
||||
group = "Computational";
|
||||
hosts = [ "owl1" "owl2" "hut" "raccoon" ];
|
||||
hosts = [ "owl1" "owl2" "hut" "raccoon" "fox" ];
|
||||
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
|
||||
@ -100,6 +100,45 @@
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
|
||||
];
|
||||
};
|
||||
|
||||
dbautist = {
|
||||
uid = 5649;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/dbautist";
|
||||
description = "Dylan Bautista Cases";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" ];
|
||||
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
|
||||
];
|
||||
};
|
||||
|
||||
dalvare1 = {
|
||||
uid = 2758;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/dalvare1";
|
||||
description = "David Álvarez";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" "fox" ];
|
||||
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
|
||||
];
|
||||
};
|
||||
|
||||
varcila = {
|
||||
uid = 5650;
|
||||
isNormalUser = true;
|
||||
home = "/home/Computational/varcila";
|
||||
description = "Vincent Arcila";
|
||||
group = "Computational";
|
||||
hosts = [ "hut" "fox" ];
|
||||
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
|
||||
openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
groups = {
|
||||
|
@ -34,37 +34,37 @@
|
||||
# Node Entry for node: mds01 (ID=72)
|
||||
10.0.40.40 bay mds01 mds01-eth0
|
||||
10.0.42.40 bay-ib mds01-ib0
|
||||
10.0.40.141 bay-ipmi mds01-ipmi0
|
||||
10.0.40.141 bay-ipmi mds01-ipmi0 mds01-ipmi
|
||||
|
||||
# Node Entry for node: oss01 (ID=73)
|
||||
10.0.40.41 oss01 oss01-eth0
|
||||
10.0.42.41 oss01-ib0
|
||||
10.0.40.142 oss01-ipmi0
|
||||
10.0.40.142 oss01-ipmi0 oss01-ipmi
|
||||
|
||||
# Node Entry for node: oss02 (ID=74)
|
||||
10.0.40.42 lake2 oss02 oss02-eth0
|
||||
10.0.42.42 lake2-ib oss02-ib0
|
||||
10.0.40.143 lake2-ipmi oss02-ipmi0
|
||||
10.0.40.143 lake2-ipmi oss02-ipmi0 oss02-ipmi
|
||||
|
||||
# Node Entry for node: xeon01 (ID=15)
|
||||
10.0.40.1 owl1 xeon01 xeon01-eth0
|
||||
10.0.42.1 owl1-ib xeon01-ib0
|
||||
10.0.40.101 owl1-ipmi xeon01-ipmi0
|
||||
10.0.40.101 owl1-ipmi xeon01-ipmi0 xeon01-ipmi
|
||||
|
||||
# Node Entry for node: xeon02 (ID=16)
|
||||
10.0.40.2 owl2 xeon02 xeon02-eth0
|
||||
10.0.42.2 owl2-ib xeon02-ib0
|
||||
10.0.40.102 owl2-ipmi xeon02-ipmi0
|
||||
10.0.40.102 owl2-ipmi xeon02-ipmi0 xeon02-ipmi
|
||||
|
||||
# Node Entry for node: xeon03 (ID=17)
|
||||
10.0.40.3 xeon03 xeon03-eth0
|
||||
10.0.42.3 xeon03-ib0
|
||||
10.0.40.103 xeon03-ipmi0
|
||||
10.0.40.103 xeon03-ipmi0 xeon03-ipmi
|
||||
|
||||
# Node Entry for node: xeon04 (ID=18)
|
||||
10.0.40.4 xeon04 xeon04-eth0
|
||||
10.0.42.4 xeon04-ib0
|
||||
10.0.40.104 xeon04-ipmi0
|
||||
10.0.40.104 xeon04-ipmi0 xeon04-ipmi
|
||||
|
||||
# Node Entry for node: xeon05 (ID=19)
|
||||
10.0.40.5 koro xeon05 xeon05-eth0
|
||||
@ -74,17 +74,21 @@
|
||||
# Node Entry for node: xeon06 (ID=20)
|
||||
10.0.40.6 xeon06 xeon06-eth0
|
||||
10.0.42.6 xeon06-ib0
|
||||
10.0.40.106 xeon06-ipmi0
|
||||
10.0.40.106 xeon06-ipmi0 xeon06-ipmi
|
||||
|
||||
# Node Entry for node: xeon07 (ID=21)
|
||||
10.0.40.7 hut xeon07 xeon07-eth0
|
||||
10.0.42.7 hut-ib xeon07-ib0
|
||||
10.0.40.107 hut-ipmi xeon07-ipmi0
|
||||
10.0.40.107 hut-ipmi xeon07-ipmi0 xeon07-ipmi
|
||||
|
||||
# Node Entry for node: xeon08 (ID=22)
|
||||
10.0.40.8 eudy xeon08 xeon08-eth0
|
||||
10.0.42.8 eudy-ib xeon08-ib0
|
||||
10.0.40.108 eudy-ipmi xeon08-ipmi0
|
||||
10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi
|
||||
|
||||
# fox
|
||||
10.0.40.26 fox
|
||||
10.0.40.126 fox-ipmi
|
||||
'';
|
||||
};
|
||||
}
|
||||
|
75
m/fox/configuration.nix
Normal file
75
m/fox/configuration.nix
Normal file
@ -0,0 +1,75 @@
|
||||
{ lib, config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
../common/xeon.nix
|
||||
../module/ceph.nix
|
||||
../module/emulation.nix
|
||||
../module/slurm-client.nix
|
||||
../module/slurm-firewall.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
|
||||
|
||||
# No swap, there is plenty of RAM
|
||||
swapDevices = lib.mkForce [];
|
||||
|
||||
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
|
||||
boot.kernelModules = [ "kvm-amd" ];
|
||||
|
||||
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
|
||||
|
||||
networking = {
|
||||
hostName = "fox";
|
||||
interfaces.enp1s0f0np0.ipv4.addresses = [ {
|
||||
address = "10.0.40.26";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
|
||||
# Configure Nvidia driver to use with CUDA
|
||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
||||
hardware.graphics.enable = true;
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
nixpkgs.config.nvidia.acceptLicense = true;
|
||||
services.xserver.videoDrivers = [ "nvidia" ];
|
||||
|
||||
# Mount NVME disks
|
||||
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
|
||||
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
|
||||
|
||||
# Make a /nvme{0,1}/$USER directory for each user.
|
||||
systemd.services.create-nvme-dirs = let
|
||||
# Take only normal users in fox
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
commands = lib.concatLists (lib.mapAttrsToList
|
||||
(_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}"
|
||||
]) users);
|
||||
script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands);
|
||||
in {
|
||||
enable = true;
|
||||
wants = [ "local-fs.target" ];
|
||||
after = [ "local-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
|
||||
# Only allow SSH connections from users who have a SLURM allocation
|
||||
# See: https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
security.pam.services.sshd.rules.account.slurm = {
|
||||
control = "required";
|
||||
enable = true;
|
||||
modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so";
|
||||
args = [ "log_level=debug5" ];
|
||||
order = 999999; # Make it last one
|
||||
};
|
||||
|
||||
# Disable systemd session (pam_systemd.so) as it will conflict with the
|
||||
# pam_slurm_adopt.so module. What happens is that the shell is first adopted
|
||||
# into the slurmstepd task and then into the systemd session, which is not
|
||||
# what we want, otherwise it will linger even if all jobs are gone.
|
||||
security.pam.services.sshd.startSession = lib.mkForce false;
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
{ config, pkgs, ... }:
|
||||
{ config, pkgs, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
@ -17,11 +17,28 @@
|
||||
./gitea.nix
|
||||
./msmtp.nix
|
||||
./postgresql.nix
|
||||
./nginx.nix
|
||||
./p.nix
|
||||
#./pxe.nix
|
||||
];
|
||||
|
||||
# Select the this using the ID to avoid mismatches
|
||||
boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN";
|
||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53567f";
|
||||
|
||||
fileSystems = {
|
||||
"/" = lib.mkForce {
|
||||
device = "/dev/disk/by-label/nvme";
|
||||
fsType = "ext4";
|
||||
neededForBoot = true;
|
||||
options = [ "noatime" ];
|
||||
};
|
||||
|
||||
"/boot" = lib.mkForce {
|
||||
device = "/dev/disk/by-label/nixos-boot";
|
||||
fsType = "ext4";
|
||||
neededForBoot = true;
|
||||
};
|
||||
};
|
||||
|
||||
networking = {
|
||||
hostName = "hut";
|
||||
|
@ -1,8 +1,9 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
|
||||
{
|
||||
age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age;
|
||||
age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age;
|
||||
age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age;
|
||||
age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age;
|
||||
age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age;
|
||||
|
||||
services.gitlab-runner = {
|
||||
enable = true;
|
||||
@ -21,21 +22,89 @@
|
||||
"--docker-network-mode host"
|
||||
];
|
||||
environmentVariables = {
|
||||
https_proxy = "http://localhost:23080";
|
||||
http_proxy = "http://localhost:23080";
|
||||
https_proxy = "http://hut:23080";
|
||||
http_proxy = "http://hut:23080";
|
||||
};
|
||||
};
|
||||
in {
|
||||
# For pm.bsc.es/gitlab
|
||||
gitlab-pm-shell = common-shell // {
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path;
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path;
|
||||
};
|
||||
gitlab-pm-docker = common-docker // {
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path;
|
||||
authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path;
|
||||
};
|
||||
|
||||
gitlab-bsc-docker = {
|
||||
# gitlab.bsc.es still uses the old token mechanism
|
||||
registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path;
|
||||
tagList = [ "docker" "hut" ];
|
||||
environmentVariables = {
|
||||
# We cannot access the hut local interface from docker, so we connect
|
||||
# to hut directly via the ethernet one.
|
||||
https_proxy = "http://hut:23080";
|
||||
http_proxy = "http://hut:23080";
|
||||
};
|
||||
executor = "docker";
|
||||
dockerImage = "alpine";
|
||||
dockerVolumes = [
|
||||
"/nix/store:/nix/store:ro"
|
||||
"/nix/var/nix/db:/nix/var/nix/db:ro"
|
||||
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
|
||||
];
|
||||
dockerExtraHosts = [
|
||||
# Required to pass the proxy via hut
|
||||
"hut:10.0.40.7"
|
||||
];
|
||||
dockerDisableCache = true;
|
||||
registrationFlags = [
|
||||
# Increase build log length to 64 MiB
|
||||
"--output-limit 65536"
|
||||
];
|
||||
preBuildScript = pkgs.writeScript "setup-container" ''
|
||||
mkdir -p -m 0755 /nix/var/log/nix/drvs
|
||||
mkdir -p -m 0755 /nix/var/nix/gcroots
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles
|
||||
mkdir -p -m 0755 /nix/var/nix/temproots
|
||||
mkdir -p -m 0755 /nix/var/nix/userpool
|
||||
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
|
||||
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
|
||||
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
|
||||
mkdir -p -m 0700 "$HOME/.nix-defexpr"
|
||||
mkdir -p -m 0700 "$HOME/.ssh"
|
||||
cat > "$HOME/.ssh/config" << EOF
|
||||
Host bscpm04.bsc.es gitlab-internal.bsc.es
|
||||
User git
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
|
||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
||||
EOF
|
||||
cat >> "$HOME/.ssh/known_hosts" << EOF
|
||||
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
|
||||
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
|
||||
EOF
|
||||
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
|
||||
# Required to load SSL certificate paths
|
||||
. ${pkgs.cacert}/nix-support/setup-hook
|
||||
'';
|
||||
environmentVariables = {
|
||||
ENV = "/etc/profile";
|
||||
USER = "root";
|
||||
NIX_REMOTE = "daemon";
|
||||
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# DOCKER* chains are useless, override at FORWARD
|
||||
networking.firewall.extraCommands = ''
|
||||
# Allow docker to use our proxy
|
||||
iptables -I FORWARD 1 -p tcp -i docker0 -d hut --dport 23080 -j nixos-fw-accept
|
||||
# Block anything else coming from docker
|
||||
iptables -I FORWARD 2 -p all -i docker0 -j nixos-fw-log-refuse
|
||||
'';
|
||||
|
||||
#systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash";
|
||||
systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false;
|
||||
systemd.services.gitlab-runner.serviceConfig.User = "gitlab-runner";
|
||||
|
31
m/hut/gpfs-probe.nix
Normal file
31
m/hut/gpfs-probe.nix
Normal file
@ -0,0 +1,31 @@
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { }
|
||||
''
|
||||
cp ${./gpfs-probe.sh} $out;
|
||||
chmod +x $out
|
||||
''
|
||||
;
|
||||
in
|
||||
{
|
||||
# Use a new user to handle the SSH keys
|
||||
users.groups.ssh-robot = { };
|
||||
users.users.ssh-robot = {
|
||||
description = "SSH Robot";
|
||||
isNormalUser = true;
|
||||
home = "/var/lib/ssh-robot";
|
||||
};
|
||||
|
||||
systemd.services.gpfs-probe = {
|
||||
description = "Daemon to report GPFS latency via SSH";
|
||||
path = [ pkgs.openssh pkgs.netcat ];
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "default.target" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}";
|
||||
User = "ssh-robot";
|
||||
Group = "ssh-robot";
|
||||
};
|
||||
};
|
||||
}
|
18
m/hut/gpfs-probe.sh
Executable file
18
m/hut/gpfs-probe.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/sh
|
||||
|
||||
N=500
|
||||
|
||||
t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}")
|
||||
|
||||
if [ -z "$t" ]; then
|
||||
t="5.00"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
|
||||
|
||||
# HELP gpfs_touch_latency Time to create $N files.
|
||||
# TYPE gpfs_touch_latency gauge
|
||||
gpfs_touch_latency $t
|
||||
EOF
|
@ -1,13 +0,0 @@
|
||||
modules:
|
||||
default:
|
||||
collectors:
|
||||
- bmc
|
||||
- ipmi
|
||||
- chassis
|
||||
|
||||
lan:
|
||||
collectors:
|
||||
- ipmi
|
||||
- chassis
|
||||
user: ""
|
||||
pass: ""
|
@ -1,7 +1,10 @@
|
||||
{ config, lib, ... }:
|
||||
|
||||
{
|
||||
imports = [ ../module/slurm-exporter.nix ];
|
||||
imports = [
|
||||
../module/slurm-exporter.nix
|
||||
./gpfs-probe.nix
|
||||
];
|
||||
|
||||
age.secrets.grafanaJungleRobotPassword = {
|
||||
file = ../../secrets/jungle-robot-password.age;
|
||||
@ -9,6 +12,8 @@
|
||||
mode = "400";
|
||||
};
|
||||
|
||||
age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age;
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
@ -70,13 +75,13 @@
|
||||
enable = true;
|
||||
group = "root";
|
||||
user = "root";
|
||||
configFile = ./ipmi.yml;
|
||||
#extraFlags = [ "--log.level=debug" ];
|
||||
configFile = config.age.secrets.ipmiYml.path;
|
||||
# extraFlags = [ "--log.level=debug" ];
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
enabledCollectors = [ "systemd" "logind" ];
|
||||
port = 9002;
|
||||
listenAddress = "127.0.0.1";
|
||||
};
|
||||
@ -102,6 +107,7 @@
|
||||
"127.0.0.1:9252"
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
|
||||
"127.0.0.1:9341" # Slurm exporter
|
||||
"127.0.0.1:9966" # GPFS custom exporter
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
|
||||
];
|
||||
}];
|
||||
@ -202,7 +208,7 @@
|
||||
# Sets the "instance" label with the remote host we are querying
|
||||
source_labels = [ "__param_target" ];
|
||||
separator = ";";
|
||||
regex = "(.*)";
|
||||
regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end
|
||||
target_label = "instance";
|
||||
replacement = "\${1}";
|
||||
action = "replace";
|
||||
@ -244,6 +250,17 @@
|
||||
module = [ "raccoon" ];
|
||||
};
|
||||
}
|
||||
{
|
||||
job_name = "ipmi-fox";
|
||||
metrics_path = "/ipmi";
|
||||
static_configs = [
|
||||
{ targets = [ "127.0.0.1:9290" ]; }
|
||||
];
|
||||
params = {
|
||||
target = [ "fox-ipmi" ];
|
||||
module = [ "fox" ];
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
|
72
m/hut/nginx.nix
Normal file
72
m/hut/nginx.nix
Normal file
@ -0,0 +1,72 @@
|
||||
{ theFlake, pkgs, ... }:
|
||||
let
|
||||
website = pkgs.stdenv.mkDerivation {
|
||||
name = "jungle-web";
|
||||
src = theFlake;
|
||||
buildInputs = [ pkgs.hugo ];
|
||||
buildPhase = ''
|
||||
cd web
|
||||
rm -rf public/
|
||||
hugo
|
||||
'';
|
||||
installPhase = ''
|
||||
cp -r public $out
|
||||
'';
|
||||
# Don't mess doc/
|
||||
dontFixup = true;
|
||||
};
|
||||
in
|
||||
{
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
virtualHosts."jungle.bsc.es" = {
|
||||
root = "${website}";
|
||||
listen = [
|
||||
{
|
||||
addr = "127.0.0.1";
|
||||
port = 80;
|
||||
}
|
||||
];
|
||||
extraConfig = ''
|
||||
set_real_ip_from 127.0.0.1;
|
||||
set_real_ip_from 84.88.52.107;
|
||||
real_ip_recursive on;
|
||||
real_ip_header X-Forwarded-For;
|
||||
|
||||
location /git {
|
||||
rewrite ^/git$ / break;
|
||||
rewrite ^/git/(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:3000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /cache {
|
||||
rewrite ^/cache(.*) /$1 break;
|
||||
proxy_pass http://127.0.0.1:5000;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /lists {
|
||||
proxy_pass http://127.0.0.1:8081;
|
||||
proxy_redirect http:// $scheme://;
|
||||
}
|
||||
location /grafana {
|
||||
proxy_pass http://127.0.0.1:2342;
|
||||
proxy_redirect http:// $scheme://;
|
||||
proxy_set_header Host $host;
|
||||
# Websockets
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
location ~ ^/~(.+?)(/.*)?$ {
|
||||
alias /ceph/home/$1/public_html$2;
|
||||
index index.html index.htm;
|
||||
autoindex on;
|
||||
absolute_redirect off;
|
||||
}
|
||||
location /p/ {
|
||||
alias /ceph/p/;
|
||||
}
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
43
m/hut/p.nix
Normal file
43
m/hut/p.nix
Normal file
@ -0,0 +1,43 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
let
|
||||
p = pkgs.writeShellScriptBin "p" ''
|
||||
set -e
|
||||
cd /ceph
|
||||
pastedir="p/$USER"
|
||||
mkdir -p "$pastedir"
|
||||
|
||||
ext="txt"
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
ext="$1"
|
||||
fi
|
||||
|
||||
out=$(mktemp "$pastedir/XXXXXXXX.$ext")
|
||||
|
||||
cat > "$out"
|
||||
chmod go+r "$out"
|
||||
echo "https://jungle.bsc.es/$out"
|
||||
'';
|
||||
in
|
||||
{
|
||||
environment.systemPackages = with pkgs; [ p ];
|
||||
|
||||
# Make sure we have a directory per user. We cannot use the nice
|
||||
# systemd-tmpfiles-setup.service service because this is a remote FS, and it
|
||||
# may not be mounted when it runs.
|
||||
systemd.services.create-paste-dirs = let
|
||||
# Take only normal users in hut
|
||||
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||
commands = lib.concatLists (lib.mapAttrsToList
|
||||
(_: user: [
|
||||
"install -d -o ${user.name} -g ${user.group} -m 0755 /ceph/p/${user.name}"
|
||||
]) users);
|
||||
script = pkgs.writeShellScript "create-paste-dirs.sh" (lib.concatLines commands);
|
||||
in {
|
||||
enable = true;
|
||||
wants = [ "remote-fs.target" ];
|
||||
after = [ "remote-fs.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig.ExecStart = script;
|
||||
};
|
||||
}
|
@ -1,15 +1,15 @@
|
||||
- targets:
|
||||
- 10.0.40.101
|
||||
- 10.0.40.102
|
||||
- 10.0.40.103
|
||||
- 10.0.40.104
|
||||
- 10.0.40.105
|
||||
- 10.0.40.106
|
||||
- 10.0.40.107
|
||||
- 10.0.40.108
|
||||
- owl1-ipmi
|
||||
- owl2-ipmi
|
||||
- xeon03-ipmi
|
||||
- xeon04-ipmi
|
||||
- koro-ipmi
|
||||
- xeon06-ipmi
|
||||
- hut-ipmi
|
||||
- eudy-ipmi
|
||||
# Storage
|
||||
- 10.0.40.141
|
||||
- 10.0.40.142
|
||||
- 10.0.40.143
|
||||
- bay-ipmi
|
||||
- oss01-ipmi
|
||||
- lake2-ipmi
|
||||
labels:
|
||||
job: ipmi-lan
|
||||
|
@ -43,12 +43,13 @@ in {
|
||||
clusterName = "jungle";
|
||||
nodeName = [
|
||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
||||
"fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=1 Feature=fox"
|
||||
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
||||
];
|
||||
|
||||
partitionName = [
|
||||
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
"all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||
];
|
||||
|
||||
# See slurm.conf(5) for more details about these options.
|
||||
@ -76,7 +77,7 @@ in {
|
||||
SuspendTimeout=60
|
||||
ResumeProgram=${resumeProgram}
|
||||
ResumeTimeout=300
|
||||
SuspendExcNodes=hut
|
||||
SuspendExcNodes=hut,fox
|
||||
|
||||
# Turn the nodes off after 1 hour of inactivity
|
||||
SuspendTime=3600
|
||||
@ -91,9 +92,29 @@ in {
|
||||
# Ignore memory constraints and only use unused cores to share a node with
|
||||
# other jobs.
|
||||
SelectTypeParameters=CR_Core
|
||||
|
||||
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||
# This sets up the "extern" step into which ssh-launched processes will be
|
||||
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
|
||||
# when a task runs (srun) so we can ssh early.
|
||||
PrologFlags=Alloc,Contain,X11
|
||||
|
||||
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
|
||||
# adopted by the external step, similar to tasks running in regular steps
|
||||
# LaunchParameters=ulimit_pam_adopt
|
||||
SlurmdDebug=debug5
|
||||
#DebugFlags=Protocol,Cgroup
|
||||
'';
|
||||
|
||||
extraCgroupConfig = ''
|
||||
CgroupPlugin=cgroup/v2
|
||||
#ConstrainCores=yes
|
||||
'';
|
||||
};
|
||||
|
||||
# Place the slurm config in /etc as this will be required by PAM
|
||||
environment.etc.slurm.source = config.services.slurm.etcSlurm;
|
||||
|
||||
age.secrets.mungeKey = {
|
||||
file = ../../secrets/munge-key.age;
|
||||
owner = "munge";
|
||||
|
@ -11,7 +11,7 @@ final: prev:
|
||||
paths = [ pmix.dev pmix.out ];
|
||||
};
|
||||
in prev.mpich.overrideAttrs (old: {
|
||||
patches = [
|
||||
patches = (old.patches or []) ++ [
|
||||
# See https://github.com/pmodels/mpich/issues/6946
|
||||
./mpich-fix-hwtopo.patch
|
||||
];
|
||||
@ -39,6 +39,18 @@ final: prev:
|
||||
# See https://bugs.schedmd.com/show_bug.cgi?id=19324
|
||||
./slurm-rank-expansion.patch
|
||||
];
|
||||
# Install also the pam_slurm_adopt library to restrict users from accessing
|
||||
# nodes with no job allocated.
|
||||
postBuild = (old.postBuild or "") + ''
|
||||
pushd contribs/pam_slurm_adopt
|
||||
make "PAM_DIR=$out/lib/security"
|
||||
popd
|
||||
'';
|
||||
postInstall = (old.postInstall or "") + ''
|
||||
pushd contribs/pam_slurm_adopt
|
||||
make "PAM_DIR=$out/lib/security" install
|
||||
popd
|
||||
'';
|
||||
});
|
||||
|
||||
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
|
||||
|
Binary file not shown.
@ -1,9 +1,9 @@
|
||||
age-encryption.org/v1
|
||||
-> ssh-ed25519 HY2yRg DQdgCk16Yu524BsrWVf0krnwWzDM6SeaJCgQipOfwCA
|
||||
Ab9ocqra/UWJZI+QGMlxUhBu5AzqfjPgXl+ENIiHYGs
|
||||
-> ssh-ed25519 CAWG4Q KF9rGCenb3nf+wyz2hyVs/EUEbsmUs5R+1fBxlCibC8
|
||||
7++Kxbr3FHVdVfnFdHYdAuR0Tgfd+sRcO6WRss6LhEw
|
||||
-> ssh-ed25519 MSF3dg aUe4DhRsu4X8CFOEAnD/XM/o/0qHYSB522woCaAVh0I
|
||||
GRcs5cm2YqA/lGhUtbpboBaz7mfgiLaCr+agaB7vACU
|
||||
--- 9Q7Ou+Pxq+3RZilCb2dKC/pCFjZEt4rp5KnTUUU7WJ8
|
||||
1¬Mw4‘Í ì:Hµ@Á/ägLtMÇ,ßÆ¥ô*¡žzñNV5ˆm‚ÍNŽoÞáj1$÷TøG_³E{Œ%“‰1ǯ‘<>H£îAÛp™
|
||||
-> ssh-ed25519 HY2yRg eRVX5yndWDLg9hw7sY1Iu8pJFy47luHvdL+zZGK2u1s
|
||||
e1nXXiMW0ywkZYh2s6c7/quGMfBOJOaRhNQDjCD2Iyc
|
||||
-> ssh-ed25519 CAWG4Q gYG7GRxRpJ0/5Wz0Z0J2wfLfkMFNmcy81dQEewM7gUA
|
||||
lamdUdx+xOFWF1lmUM4x9TT0cJtKu9Sp7w9JHwm13u0
|
||||
-> ssh-ed25519 MSF3dg HEzfpR8alG6WPzhaEjAmmjOFoFcMSQUldx46dBsXri4
|
||||
OAD5H/zZGhfevYrFJzJrbNKPomKZDOS9Qx5tmTp78Jo
|
||||
--- A0sMSiNXWaEIgRXR0x6UAIaluuVH6Zlv4CJ9sI0NXOw
|
||||
ÿú6çphóÎÆ{Ñ>®F|ÅiÃvâæE}{ìruÎâÆ·‹Ý°ËÍ}^»‰>ñc6¥´j÷ ùgèGW<47>Ã:—J3ù|ø|†ZÑ
|
11
secrets/gitlab-bsc-docker-token.age
Normal file
11
secrets/gitlab-bsc-docker-token.age
Normal file
@ -0,0 +1,11 @@
|
||||
age-encryption.org/v1
|
||||
-> ssh-ed25519 HY2yRg WSdjyQPzBJ4JbzQpGeq1AAYpWKoXmLI1ZtmNmM5QOzs
|
||||
qGDlDT31DQF1DdHen0+5+52DdsQlabJdA2pOB5O1I6g
|
||||
-> ssh-ed25519 CAWG4Q wioWMDxQjN+d4JdIbCwZg0DLQu1OH2mV6gukRprjuAs
|
||||
670fE61hidOEh20hHiQAhP0+CjDF0WMBNzgwkGT8Yqg
|
||||
-> ssh-ed25519 MSF3dg DN19uvAEtqq4708P6HpuX9i/o/qAvHX6dj69dCF2H1o
|
||||
4Lu9GnjiFLMeXJ2C7aVPJsCHCQVlhylNWJi896Av92s
|
||||
--- 7cKBwOYNOUZ2h3/kAY09aSMASZSxX7hZIT4kvlIiT6w
|
||||
³6—çà•äfQF5=¦bX+‡v e`Ï7/øªA~PÎÖѦ7<15>Ì
|
||||
´ÖA÷)·h³ù=oZ¸$é^´V0ñ/Ü…µr
|
||||
k¸uœbĶ:R‘<52>>^gŒõ¼ik_*%<0B>a7ùKGæ<47>ÐÖçâ&PI¶£n
|
@ -1,9 +1,10 @@
|
||||
age-encryption.org/v1
|
||||
-> ssh-ed25519 HY2yRg WvKK6U1wQtx2pbUDfuaUIXTQiCulDkz7hgUCSwMfMzQ
|
||||
jLktUMqKuVxukqzz++pHOKvmucUQqeKYy5IwBma7KxY
|
||||
-> ssh-ed25519 CAWG4Q XKGuNNoYFl9bdZzsqYYTY7GsEt5sypLW4R+1uk78NmU
|
||||
8dIA2GzRAwTGM5CDHSM2BUBsbXzEAUssWUz2PY2PaTg
|
||||
-> ssh-ed25519 MSF3dg T630RsKuZIF/bp+KITnIIWWHsg6M/VQGqbWQZxqT+AA
|
||||
SraZcgZJVtmUzHF/XR9J7aK5t5EDNpkC/av/WJUT/G8
|
||||
--- /12G8pj9sbs591OM/ryhoLnSWWmzYcoqprk9uN/3g18
|
||||
ä·ù¼Â‡%å]yi"ô<>»LÓâùH`ªa$Æþ)¦9ve<76>.0úmÉK<EFBFBD>vƒÀïu"|1cÞ-%ÔÕ"åWFï¡ÞA«<41>hº$•ºj<eñ¶xÅLx«ç.?œÈâ:L…¬–ƒ,ëu»|³‹F|Õi²äÔ
|
||||
-> ssh-ed25519 HY2yRg GdmdkW+BqqwBgu30b846jv3J7jtCM+a3rgOERuA050A
|
||||
FeGqM75jG9egesR+yyVKHm0/M+uBBp5Hclg4+qN0BR8
|
||||
-> ssh-ed25519 CAWG4Q a0wTWHgulQUYDAMZmXf3dOf6PdYgCqNtSylzWVVRNVM
|
||||
Bx+WSYaiY4ZwlSZJo2a1XPMQmbKOU7F0tKAqVRLBOPo
|
||||
-> ssh-ed25519 MSF3dg KccUvZZUbxbCrRWUWrX8KcHF6vQ5FV/BqUqI59G7dj4
|
||||
CFr7GXpZ9rPgy7HBfOyiYF9FnZUw6KcZwq9f7/0KaU8
|
||||
--- E0Rp6RR/8+o0jvB1lRdhnlabxvI6uu/IgL2ZpPXzTc8
|
||||
û#ã¶H÷$°F;Ñéù%›È6êË2†¢rfXŸ\Dn ÖшºÈ‰©x™Î>¥Ù&;÷c‘UŠI=›ÑMöÀª?Tœ¡Ç¸ÂÂ"px†Ó\s‚ÙãbFý<46>ù¹WD¼{Ë
|
||||
AW>?U©ÙÊçÐHÔ³
|
Binary file not shown.
BIN
secrets/ipmi.yml.age
Normal file
BIN
secrets/ipmi.yml.age
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -9,8 +9,10 @@ in
|
||||
"gitea-runner-token.age".publicKeys = hut;
|
||||
"gitlab-runner-docker-token.age".publicKeys = hut;
|
||||
"gitlab-runner-shell-token.age".publicKeys = hut;
|
||||
"gitlab-bsc-docker-token.age".publicKeys = hut;
|
||||
"nix-serve.age".publicKeys = hut;
|
||||
"jungle-robot-password.age".publicKeys = hut;
|
||||
"ipmi.yml.age".publicKeys = hut;
|
||||
|
||||
"ceph-user.age".publicKeys = safe;
|
||||
"munge-key.age".publicKeys = safe;
|
||||
|
@ -11,7 +11,7 @@ access to the login machine using a resource petition in the BSC intranet.
|
||||
|
||||
Then, to request access to the machines we will need some information about you:
|
||||
|
||||
1. Which machines you want access to (hut, owl1, owl2, eudy, koro...)
|
||||
1. Which machines you want access to ([hut](/hut), [fox](/fox), owl1, owl2, eudy, koro...)
|
||||
1. Your user name and user id (to match the NFS permissions)
|
||||
1. Your real name and surname (for identification purposes)
|
||||
1. The salted hash of your login password, generated with `mkpasswd -m sha-512`
|
||||
|
10
web/content/doc/_index.md
Normal file
10
web/content/doc/_index.md
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
title: "Docs"
|
||||
description: "Documentation for users of jungle machines"
|
||||
date: 2023-09-15
|
||||
---
|
||||
|
||||
If this is the first time you use any of the jungle machines with NixOS, follow
|
||||
the [quick start guide](quickstart).
|
||||
|
||||
|
234
web/content/doc/quickstart.md
Normal file
234
web/content/doc/quickstart.md
Normal file
@ -0,0 +1,234 @@
|
||||
---
|
||||
title: "Quick start"
|
||||
date: 2023-09-15
|
||||
---
|
||||
|
||||
This documentation will guide you on how to build custom packages of software
|
||||
and use them in the jungle machines. It has been designed to reduce the friction
|
||||
from users coming from module systems.
|
||||
|
||||
You should be able to access the jungle machines, otherwise [request
|
||||
access](/access).
|
||||
|
||||
## Changes from other HPC machines
|
||||
|
||||
Users of other machines have been using the Lmod tool (module load ...) to add
|
||||
or remove programs from their environment, as well as manually building their
|
||||
own software for too many years.
|
||||
|
||||
While we cannot prevent users from continuing to use this tedious mechanism, we
|
||||
have designed the jungle machines to be much easier to operate by using the nix
|
||||
package manager.
|
||||
|
||||
### Freedom to install packages
|
||||
|
||||
When a user wanted to install a package, it was forced to either do it on its
|
||||
own directory, or request a system administrator to install it in a shared
|
||||
directory, so other users can also use that package.
|
||||
|
||||
This situation is gone, each user can install any package of software by
|
||||
themselves, without requiring any other authorization. When two users request
|
||||
the same package, the same copy will be provided.
|
||||
|
||||
A new package will be downloaded if it is available (someone already built it)
|
||||
or will be built from source on demand.
|
||||
|
||||
### No changes over time
|
||||
|
||||
All users retain the same versions of the packages they request until they
|
||||
decide to update them.
|
||||
|
||||
## Using nix to manage packages
|
||||
|
||||
In this chapter we show how to install packages and enter a development shell to
|
||||
build new programs from source. The examples are done from the hut machine,
|
||||
read [this page](/access) to request access.
|
||||
|
||||
### Installing binaries
|
||||
|
||||
To temporarily install new packages, use:
|
||||
|
||||
```text
|
||||
hut% nix shell jungle#gcc jungle#cowsay jungle#ovni
|
||||
```
|
||||
|
||||
Notice that the packages are described as two parts divided by the `#` symbol.
|
||||
The first part defines where to take the package from and the second part is
|
||||
the name of the package. For now we will use `jungle#<package>`. You can find
|
||||
many more packages here:
|
||||
|
||||
<https://search.nixos.org/packages>
|
||||
|
||||
You will now enter a new shell, where those requested package **binaries are
|
||||
available in $PATH**:
|
||||
|
||||
```text
|
||||
hut% cowsay hello world
|
||||
_____________
|
||||
< hello world >
|
||||
-------------
|
||||
\ ^__^
|
||||
\ (oo)\_______
|
||||
(__)\ )\/\
|
||||
||----w |
|
||||
|| ||
|
||||
|
||||
hut% ovniver
|
||||
LD_LIBRARY_PATH not set
|
||||
libovni: build v1.11.0 (a7103f8), dynamic v1.11.0 (a7103f8)
|
||||
|
||||
hut% gcc --version
|
||||
gcc (GCC) 13.3.0
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
This is free software; see the source for copying conditions. There is NO
|
||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
```
|
||||
|
||||
### Building programs
|
||||
|
||||
The above method only loads new binaries in the `$PATH`. If we try to build a
|
||||
program that includes headers or links with a library, it will fail to find
|
||||
them:
|
||||
|
||||
```text
|
||||
hut$ cat test.c
|
||||
#include <ovni.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
ovni_version_check();
|
||||
return 0;
|
||||
}
|
||||
hut% gcc test.c -lovni -o test
|
||||
test.c:1:10: fatal error: ovni.h: No such file or directory
|
||||
1 | #include <ovni.h>
|
||||
| ^~~~~~~~
|
||||
compilation terminated.
|
||||
```
|
||||
|
||||
We could manually add the full path to the ovni include directory with `-I` and
|
||||
the libraries with `-L`, but there is a tool that already perform these steps
|
||||
automatically for us, `nix develop`.
|
||||
|
||||
Let's go back to our original shell first, where those packages are not
|
||||
available anymore:
|
||||
|
||||
```
|
||||
hut% ps
|
||||
PID TTY TIME CMD
|
||||
2356260 pts/1 00:00:01 zsh
|
||||
2457268 pts/1 00:00:00 zsh
|
||||
2457297 pts/1 00:00:00 ps
|
||||
hut% exit
|
||||
hut% ovniver
|
||||
ovniver: command not found
|
||||
```
|
||||
|
||||
### Creating a flake.nix
|
||||
|
||||
To define which packages we want, we will write a small file that list them, a
|
||||
flake.nix file.
|
||||
|
||||
First, we will create a new directory where we are going to be working:
|
||||
|
||||
```
|
||||
hut% mkdir example
|
||||
hut% cd exmple
|
||||
```
|
||||
|
||||
Then place this flake.nix file:
|
||||
|
||||
```nix
|
||||
{
|
||||
inputs.jungle.url = "jungle";
|
||||
outputs = { self, jungle }:
|
||||
let
|
||||
pkgs = jungle.outputs.packages.x86_64-linux;
|
||||
in {
|
||||
devShells.x86_64-linux.default = pkgs.mkShell {
|
||||
pname = "devshell";
|
||||
buildInputs = with pkgs; [
|
||||
ovni gcc cowsay # more packages here...
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Now enter the shell with:
|
||||
|
||||
```
|
||||
hut% nix develop
|
||||
warning: creating lock file '/home/Computational/rarias/example/flake.lock':
|
||||
• Added input 'jungle':
|
||||
'path:/nix/store/27srv8haj6vv4ywrbmw0a8vds561m8rq-source?lastModified=1739479441&narHash=sha256-Kgjs8SO1w9NbPBu8ghwzCxYJ9kvWpoQOT%2BXwPvA9DcU%3D&rev=76396c0d67ef0cf32377d5c1894bb695293bca9d' (2025-02-13)
|
||||
• Added input 'jungle/agenix':
|
||||
'github:ryantm/agenix/f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41?narHash=sha256-b%2Buqzj%2BWa6xgMS9aNbX4I%2BsXeb5biPDi39VgvSFqFvU%3D' (2024-08-10)
|
||||
• Added input 'jungle/agenix/darwin':
|
||||
'github:lnl7/nix-darwin/4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d?narHash=sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0%3D' (2023-11-24)
|
||||
• Added input 'jungle/agenix/darwin/nixpkgs':
|
||||
follows 'jungle/agenix/nixpkgs'
|
||||
• Added input 'jungle/agenix/home-manager':
|
||||
'github:nix-community/home-manager/3bfaacf46133c037bb356193bd2f1765d9dc82c1?narHash=sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE%3D' (2023-12-20)
|
||||
• Added input 'jungle/agenix/home-manager/nixpkgs':
|
||||
follows 'jungle/agenix/nixpkgs'
|
||||
• Added input 'jungle/agenix/nixpkgs':
|
||||
follows 'jungle/nixpkgs'
|
||||
• Added input 'jungle/agenix/systems':
|
||||
'github:nix-systems/default/da67096a3b9bf56a91d16901293e51ba5b49a27e?narHash=sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768%3D' (2023-04-09)
|
||||
• Added input 'jungle/bscpkgs':
|
||||
'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f' (2024-11-29)
|
||||
• Added input 'jungle/bscpkgs/nixpkgs':
|
||||
follows 'jungle/nixpkgs'
|
||||
• Added input 'jungle/nixpkgs':
|
||||
'github:NixOS/nixpkgs/9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc?narHash=sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8%3D' (2025-01-14)
|
||||
|
||||
hut$
|
||||
```
|
||||
|
||||
Notice that long list of messages is Nix creating a new flake.lock file with the
|
||||
current state of the packages. Next invocations will use the same packages as
|
||||
described by the lock file.
|
||||
|
||||
### Building a program from nix develop
|
||||
|
||||
Now let's try again building our test program:
|
||||
|
||||
```text
|
||||
hut$ cat test.c
|
||||
#include <ovni.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
ovni_version_check();
|
||||
return 0;
|
||||
}
|
||||
hut$ gcc test.c -o test -lovni
|
||||
hut$ ldd test
|
||||
linux-vdso.so.1 (0x00007ffff7fc4000)
|
||||
libovni.so.1 => /nix/store/sqk972akjv0q8dchn8ccjln2llzyyfd0-ovni-1.11.0/lib/libovni.so.1 (0x00007ffff7fab000)
|
||||
libc.so.6 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/libc.so.6 (0x00007ffff7db2000)
|
||||
/nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/ld-linux-x86-64.so.2 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib64/ld-linux-x86-64.so.2 (0x00007ffff7fc6000)
|
||||
hut$ ./test
|
||||
```
|
||||
|
||||
Now the ovni.h header and the libovni library are found and the program is
|
||||
successfully built, linked and executed.
|
||||
|
||||
You can add more packages as needed in your flake.nix:
|
||||
|
||||
```nix
|
||||
buildInputs = with pkgs; [
|
||||
ovni gcc cowsay # more packages here...
|
||||
];
|
||||
```
|
||||
|
||||
Make sure you exit the develop shell first, and then enter again with `nix
|
||||
develop`.
|
||||
|
||||
## Remember
|
||||
|
||||
- `nix shell` places binaries in the `$PATH`.
|
||||
- `nix develop` enters a development shell where both binaries and the libraries
|
||||
and includes are available so you can build new programs.
|
97
web/content/fox/_index.md
Normal file
97
web/content/fox/_index.md
Normal file
@ -0,0 +1,97 @@
|
||||
---
|
||||
title: "Fox"
|
||||
description: "AMD Genoa 9684X with 2 NVIDIA RTX4000 GPUs"
|
||||
date: 2025-02-12
|
||||
---
|
||||
|
||||

|
||||
|
||||
Picture by [Joanne Redwood](https://web.archive.org/web/20191109175146/https://www.inaturalist.org/photos/6568074),
|
||||
[CC0](http://creativecommons.org/publicdomain/zero/1.0/deed.en).
|
||||
|
||||
The *fox* machine is a big GPU server that is configured to run heavy workloads.
|
||||
It has two fast AMD CPUs with large cache and 2 reasonable NVIDIA GPUs. Here are
|
||||
the detailed specifications:
|
||||
|
||||
- 2x AMD GENOA X 9684X DP/UP 96C/192T 2.55G 1,150M 400W SP5 3D V-cach
|
||||
- 24x 32GB DDR5-4800 ECC RDIMM (total 768 GiB of RAM)
|
||||
- 1x 2.5" SSD SATA3 MICRON 5400 MAX 480GB
|
||||
- 2x 2.5" KIOXIA CM7-R 1.92TB NVMe GEN5 PCIe 5x4
|
||||
- 2x NVIDIA RTX4000 ADA Gen 20GB GDDR6 PCIe 4.0
|
||||
|
||||
## Access
|
||||
|
||||
To access the machine, request a SLURM session from [hut](/hut) using the `fox`
|
||||
partition:
|
||||
|
||||
hut% salloc -p fox
|
||||
|
||||
Then connect via ssh:
|
||||
|
||||
hut% ssh fox
|
||||
fox%
|
||||
|
||||
Follow [these steps](/access) if you don't have access to hut or fox.
|
||||
|
||||
## CUDA
|
||||
|
||||
To use CUDA, you can use the following `flake.nix` placed in a new directory to
|
||||
load all the required dependencies:
|
||||
|
||||
```nix
|
||||
{
|
||||
inputs.jungle.url = "jungle";
|
||||
|
||||
outputs = { jungle, ... }: {
|
||||
devShell.x86_64-linux = let
|
||||
pkgs = jungle.nixosConfigurations.fox.pkgs;
|
||||
in pkgs.mkShell {
|
||||
name = "cuda-env-shell";
|
||||
buildInputs = with pkgs; [
|
||||
git gitRepo gnupg autoconf curl
|
||||
procps gnumake util-linux m4 gperf unzip
|
||||
|
||||
# Cuda packages (more at https://search.nixos.org/packages)
|
||||
cudatoolkit linuxPackages.nvidia_x11
|
||||
cudaPackages.cuda_cudart.static
|
||||
cudaPackages.libcusparse
|
||||
|
||||
libGLU libGL
|
||||
xorg.libXi xorg.libXmu freeglut
|
||||
xorg.libXext xorg.libX11 xorg.libXv xorg.libXrandr zlib
|
||||
ncurses5 stdenv.cc binutils
|
||||
];
|
||||
shellHook = ''
|
||||
export CUDA_PATH=${pkgs.cudatoolkit}
|
||||
export LD_LIBRARY_PATH=/var/run/opengl-driver/lib
|
||||
export SMS=50
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
Then just run `nix develop` from the same directory:
|
||||
|
||||
% mkdir cuda
|
||||
% cd cuda
|
||||
% vim flake.nix
|
||||
[...]
|
||||
% nix develop
|
||||
$ nvcc -V
|
||||
nvcc: NVIDIA (R) Cuda compiler driver
|
||||
Copyright (c) 2005-2024 NVIDIA Corporation
|
||||
Built on Tue_Feb_27_16:19:38_PST_2024
|
||||
Cuda compilation tools, release 12.4, V12.4.99
|
||||
Build cuda_12.4.r12.4/compiler.33961263_0
|
||||
|
||||
## Filesystems
|
||||
|
||||
The machine has several file systems available.
|
||||
|
||||
- `$HOME`: Mounted via NFS across all nodes. It is slow and has low capacity.
|
||||
Don't abuse.
|
||||
- `/ceph/home/$USER`: Shared Ceph file system across jungle nodes. Slow but high
|
||||
capacity. Stores three redundant copies of every file.
|
||||
- `/nvme{0,1}/$USER`: The two local NVME disks, very fast and large capacity.
|
||||
- `/tmp`: tmpfs, fast but not backed by a disk. Will be erased on reboot.
|
BIN
web/content/fox/fox.jpg
Normal file
BIN
web/content/fox/fox.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 126 KiB |
68
web/content/paste/_index.md
Normal file
68
web/content/paste/_index.md
Normal file
@ -0,0 +1,68 @@
|
||||
---
|
||||
title: "Paste"
|
||||
description: "Paste service"
|
||||
author: "Rodrigo Arias Mallo"
|
||||
date: 2024-09-20
|
||||
---
|
||||
|
||||
The hut machine provides a paste service using the program `p` (as in paste).
|
||||
|
||||
You can use it directly from the hut machine or remotely if you have [SSH
|
||||
access](/access) to hut using the following alias:
|
||||
|
||||
```
|
||||
alias p="ssh hut p"
|
||||
```
|
||||
|
||||
You can add it to bashrc or zshrc for persistent installation.
|
||||
|
||||
## Usage
|
||||
|
||||
The `p` command reads from the standard input, uploads the content to a file
|
||||
in the ceph filesystem and prints the URL to access it. It only accepts an
|
||||
optional argument, which is the extension of the file that will be stored on
|
||||
disk (without the dot). By default it uses the `txt` extension, so plain text
|
||||
can be read in the browser directly.
|
||||
|
||||
```
|
||||
p [extension]
|
||||
```
|
||||
|
||||
To remove files, go to `/ceph/p/$USER` and remove them manually.
|
||||
|
||||
## Examples
|
||||
|
||||
Share a text file, in this case the source of p itself:
|
||||
|
||||
```
|
||||
hut% p < m/hut/p.nix
|
||||
https://jungle.bsc.es/p/rarias/okbtG130.txt
|
||||
```
|
||||
|
||||
Paste the last dmesg lines directly from a pipe:
|
||||
|
||||
```
|
||||
hut% dmesg | tail -5 | p
|
||||
https://jungle.bsc.es/p/rarias/luX4STm9.txt
|
||||
```
|
||||
|
||||
Upload a PNG picture from a file:
|
||||
|
||||
```
|
||||
hop% p png < mark-api-cpu.png
|
||||
https://jungle.bsc.es/p/rarias/oSRAMVsE.png
|
||||
```
|
||||
|
||||
Take an screenshot and upload it as a PNG file:
|
||||
|
||||
```
|
||||
hop% scrot -s - | p png
|
||||
https://jungle.bsc.es/p/rarias/SOgK5EV0.png
|
||||
```
|
||||
|
||||
Upload a directory by creating a tar.gz file on the fly:
|
||||
|
||||
```
|
||||
hop% tar c ovni | gzip | p tar.gz
|
||||
https://jungle.bsc.es/p/rarias/tkwROcTR.tar.gz
|
||||
```
|
@ -3,26 +3,38 @@ languageCode = 'en-us'
|
||||
title = 'The jungle'
|
||||
theme = 'PaperMod'
|
||||
|
||||
[[menu.main]]
|
||||
identifier = "doc"
|
||||
name = "Docs"
|
||||
url = "/doc/"
|
||||
weight = 10
|
||||
|
||||
[[menu.main]]
|
||||
identifier = "grafana"
|
||||
name = "Grafana"
|
||||
url = "/grafana/"
|
||||
weight = 10
|
||||
weight = 20
|
||||
|
||||
[[menu.main]]
|
||||
identifier = "Git"
|
||||
name = "Git"
|
||||
url = "/git/"
|
||||
weight = 20
|
||||
weight = 30
|
||||
|
||||
[[menu.main]]
|
||||
identifier = "Lists"
|
||||
name = "Lists"
|
||||
url = "/lists/"
|
||||
weight = 30
|
||||
weight = 40
|
||||
|
||||
[[menu.main]]
|
||||
identifier = "Paste"
|
||||
name = "Paste"
|
||||
url = "/paste/"
|
||||
weight = 50
|
||||
|
||||
[[menu.main]]
|
||||
identifier = "Posts"
|
||||
name = "Posts"
|
||||
url = "/posts/"
|
||||
weight = 40
|
||||
weight = 60
|
||||
|
@ -49,4 +49,4 @@
|
||||
{{ end }}{{ end }}
|
||||
|
||||
{{- /* Facebook Page Admin ID for Domain Insights */}}
|
||||
{{- with site.Social.facebook_admin }}<meta property="fb:admins" content="{{ . }}" />{{ end }}
|
||||
{{- with site.Params.Social.facebook_admin }}<meta property="fb:admins" content="{{ . }}" />{{ end }}
|
||||
|
@ -28,6 +28,6 @@
|
||||
{{- end }}
|
||||
<meta name="twitter:title" content="{{ .Title }}"/>
|
||||
<meta name="twitter:description" content="{{ with .Description }}{{ . }}{{ else }}{{if .IsPage}}{{ .Summary }}{{ else }}{{ with site.Params.description }}{{ . }}{{ end }}{{ end }}{{ end -}}"/>
|
||||
{{ with site.Social.twitter -}}
|
||||
{{ with site.Params.Social.twitter -}}
|
||||
<meta name="twitter:site" content="@{{ . }}"/>
|
||||
{{ end -}}
|
||||
|
Loading…
Reference in New Issue
Block a user