forked from rarias/jungle
		
	Compare commits
	
		
			6 Commits
		
	
	
		
			de9fedbe0d
			...
			3b11918af5
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 3b11918af5 | |||
| 58ce0e4445 | |||
| f940ad646b | |||
| 982fc647cf | |||
| 89fc9fb163 | |||
| 5d549df767 | 
| @ -25,7 +25,6 @@ in | |||||||
|       bay     = mkConf "bay"; |       bay     = mkConf "bay"; | ||||||
|       lake2   = mkConf "lake2"; |       lake2   = mkConf "lake2"; | ||||||
|       raccoon = mkConf "raccoon"; |       raccoon = mkConf "raccoon"; | ||||||
|       fox     = mkConf "fox"; |  | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { |     packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // { | ||||||
|  | |||||||
							
								
								
									
										3
									
								
								keys.nix
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								keys.nix
									
									
									
									
									
								
							| @ -9,11 +9,10 @@ rec { | |||||||
|     koro  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; |     koro  = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; | ||||||
|     bay   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; |     bay   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; | ||||||
|     lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; |     lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; | ||||||
|     fox   = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDa9lId4rB/EKGkkCCVOy0cuId2SYLs+8W8kx0kmpO1y fox"; |  | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|   hostGroup = with hosts; rec { |   hostGroup = with hosts; rec { | ||||||
|     compute    = [ owl1 owl2 fox ]; |     compute    = [ owl1 owl2 ]; | ||||||
|     playground = [ eudy koro ]; |     playground = [ eudy koro ]; | ||||||
|     storage    = [ bay lake2 ]; |     storage    = [ bay lake2 ]; | ||||||
|     monitor    = [ hut ]; |     monitor    = [ hut ]; | ||||||
|  | |||||||
| @ -68,7 +68,7 @@ | |||||||
|         home = "/home/Computational/anavarro"; |         home = "/home/Computational/anavarro"; | ||||||
|         description = "Antoni Navarro"; |         description = "Antoni Navarro"; | ||||||
|         group = "Computational"; |         group = "Computational"; | ||||||
|         hosts = [ "hut" "raccoon" "fox" ]; |         hosts = [ "hut" "raccoon" ]; | ||||||
|         hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; |         hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; | ||||||
|         openssh.authorizedKeys.keys = [ |         openssh.authorizedKeys.keys = [ | ||||||
|           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" |           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" | ||||||
| @ -81,7 +81,7 @@ | |||||||
|         home = "/home/Computational/abonerib"; |         home = "/home/Computational/abonerib"; | ||||||
|         description = "Aleix Boné"; |         description = "Aleix Boné"; | ||||||
|         group = "Computational"; |         group = "Computational"; | ||||||
|         hosts = [ "owl1" "owl2" "hut" "raccoon" "fox" ]; |         hosts = [ "owl1" "owl2" "hut" "raccoon" ]; | ||||||
|         hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; |         hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/"; | ||||||
|         openssh.authorizedKeys.keys = [ |         openssh.authorizedKeys.keys = [ | ||||||
|           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" |           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc" | ||||||
| @ -113,32 +113,6 @@ | |||||||
|           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" |           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791" | ||||||
|         ]; |         ]; | ||||||
|       }; |       }; | ||||||
| 
 |  | ||||||
|       dalvare1 = { |  | ||||||
|         uid = 2758; |  | ||||||
|         isNormalUser = true; |  | ||||||
|         home = "/home/Computational/dalvare1"; |  | ||||||
|         description = "David Álvarez"; |  | ||||||
|         group = "Computational"; |  | ||||||
|         hosts = [ "hut" "fox" ]; |  | ||||||
|         hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0"; |  | ||||||
|         openssh.authorizedKeys.keys = [ |  | ||||||
|           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead" |  | ||||||
|         ]; |  | ||||||
|       }; |  | ||||||
| 
 |  | ||||||
|       varcila = { |  | ||||||
|         uid = 5650; |  | ||||||
|         isNormalUser = true; |  | ||||||
|         home = "/home/Computational/varcila"; |  | ||||||
|         description = "Vincent Arcila"; |  | ||||||
|         group = "Computational"; |  | ||||||
|         hosts = [ "hut" "fox" ]; |  | ||||||
|         hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0"; |  | ||||||
|         openssh.authorizedKeys.keys = [ |  | ||||||
|           "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch" |  | ||||||
|         ]; |  | ||||||
|       }; |  | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     groups = { |     groups = { | ||||||
|  | |||||||
| @ -34,37 +34,37 @@ | |||||||
|       # Node Entry for node: mds01 (ID=72) |       # Node Entry for node: mds01 (ID=72) | ||||||
|       10.0.40.40              bay mds01 mds01-eth0 |       10.0.40.40              bay mds01 mds01-eth0 | ||||||
|       10.0.42.40              bay-ib mds01-ib0 |       10.0.42.40              bay-ib mds01-ib0 | ||||||
|       10.0.40.141             bay-ipmi mds01-ipmi0 mds01-ipmi |       10.0.40.141             bay-ipmi mds01-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: oss01 (ID=73) |       # Node Entry for node: oss01 (ID=73) | ||||||
|       10.0.40.41              oss01 oss01-eth0 |       10.0.40.41              oss01 oss01-eth0 | ||||||
|       10.0.42.41              oss01-ib0 |       10.0.42.41              oss01-ib0 | ||||||
|       10.0.40.142             oss01-ipmi0 oss01-ipmi |       10.0.40.142             oss01-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: oss02 (ID=74) |       # Node Entry for node: oss02 (ID=74) | ||||||
|       10.0.40.42              lake2 oss02 oss02-eth0 |       10.0.40.42              lake2 oss02 oss02-eth0 | ||||||
|       10.0.42.42              lake2-ib oss02-ib0 |       10.0.42.42              lake2-ib oss02-ib0 | ||||||
|       10.0.40.143             lake2-ipmi oss02-ipmi0 oss02-ipmi |       10.0.40.143             lake2-ipmi oss02-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: xeon01 (ID=15) |       # Node Entry for node: xeon01 (ID=15) | ||||||
|       10.0.40.1               owl1 xeon01 xeon01-eth0 |       10.0.40.1               owl1 xeon01 xeon01-eth0 | ||||||
|       10.0.42.1               owl1-ib xeon01-ib0 |       10.0.42.1               owl1-ib xeon01-ib0 | ||||||
|       10.0.40.101             owl1-ipmi xeon01-ipmi0 xeon01-ipmi |       10.0.40.101             owl1-ipmi xeon01-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: xeon02 (ID=16) |       # Node Entry for node: xeon02 (ID=16) | ||||||
|       10.0.40.2               owl2 xeon02 xeon02-eth0 |       10.0.40.2               owl2 xeon02 xeon02-eth0 | ||||||
|       10.0.42.2               owl2-ib xeon02-ib0 |       10.0.42.2               owl2-ib xeon02-ib0 | ||||||
|       10.0.40.102             owl2-ipmi xeon02-ipmi0 xeon02-ipmi |       10.0.40.102             owl2-ipmi xeon02-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: xeon03 (ID=17) |       # Node Entry for node: xeon03 (ID=17) | ||||||
|       10.0.40.3               xeon03 xeon03-eth0 |       10.0.40.3               xeon03 xeon03-eth0 | ||||||
|       10.0.42.3               xeon03-ib0 |       10.0.42.3               xeon03-ib0 | ||||||
|       10.0.40.103             xeon03-ipmi0 xeon03-ipmi |       10.0.40.103             xeon03-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: xeon04 (ID=18) |       # Node Entry for node: xeon04 (ID=18) | ||||||
|       10.0.40.4               xeon04 xeon04-eth0 |       10.0.40.4               xeon04 xeon04-eth0 | ||||||
|       10.0.42.4               xeon04-ib0 |       10.0.42.4               xeon04-ib0 | ||||||
|       10.0.40.104             xeon04-ipmi0 xeon04-ipmi |       10.0.40.104             xeon04-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: xeon05 (ID=19) |       # Node Entry for node: xeon05 (ID=19) | ||||||
|       10.0.40.5               koro xeon05 xeon05-eth0 |       10.0.40.5               koro xeon05 xeon05-eth0 | ||||||
| @ -74,21 +74,17 @@ | |||||||
|       # Node Entry for node: xeon06 (ID=20) |       # Node Entry for node: xeon06 (ID=20) | ||||||
|       10.0.40.6               xeon06 xeon06-eth0 |       10.0.40.6               xeon06 xeon06-eth0 | ||||||
|       10.0.42.6               xeon06-ib0 |       10.0.42.6               xeon06-ib0 | ||||||
|       10.0.40.106             xeon06-ipmi0 xeon06-ipmi |       10.0.40.106             xeon06-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: xeon07 (ID=21) |       # Node Entry for node: xeon07 (ID=21) | ||||||
|       10.0.40.7               hut xeon07 xeon07-eth0 |       10.0.40.7               hut xeon07 xeon07-eth0 | ||||||
|       10.0.42.7               hut-ib xeon07-ib0 |       10.0.42.7               hut-ib xeon07-ib0 | ||||||
|       10.0.40.107             hut-ipmi xeon07-ipmi0 xeon07-ipmi |       10.0.40.107             hut-ipmi xeon07-ipmi0 | ||||||
|        |        | ||||||
|       # Node Entry for node: xeon08 (ID=22) |       # Node Entry for node: xeon08 (ID=22) | ||||||
|       10.0.40.8               eudy xeon08 xeon08-eth0 |       10.0.40.8               eudy xeon08 xeon08-eth0 | ||||||
|       10.0.42.8               eudy-ib xeon08-ib0 |       10.0.42.8               eudy-ib xeon08-ib0 | ||||||
|       10.0.40.108             eudy-ipmi xeon08-ipmi0 xeon08-ipmi |       10.0.40.108             eudy-ipmi xeon08-ipmi0 | ||||||
| 
 |  | ||||||
|       # fox |  | ||||||
|       10.0.40.26              fox |  | ||||||
|       10.0.40.126             fox-ipmi |  | ||||||
|     ''; |     ''; | ||||||
|   }; |   }; | ||||||
| } | } | ||||||
|  | |||||||
| @ -1,75 +0,0 @@ | |||||||
| { lib, config, pkgs, ... }: |  | ||||||
| 
 |  | ||||||
| { |  | ||||||
|   imports = [ |  | ||||||
|     ../common/xeon.nix |  | ||||||
|     ../module/ceph.nix |  | ||||||
|     ../module/emulation.nix |  | ||||||
|     ../module/slurm-client.nix |  | ||||||
|     ../module/slurm-firewall.nix |  | ||||||
|   ]; |  | ||||||
| 
 |  | ||||||
|   # Select the this using the ID to avoid mismatches |  | ||||||
|   boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103"; |  | ||||||
| 
 |  | ||||||
|   # No swap, there is plenty of RAM |  | ||||||
|   swapDevices = lib.mkForce []; |  | ||||||
| 
 |  | ||||||
|   boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ]; |  | ||||||
|   boot.kernelModules = [ "kvm-amd" ]; |  | ||||||
| 
 |  | ||||||
|   hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; |  | ||||||
|   hardware.cpu.intel.updateMicrocode = lib.mkForce false; |  | ||||||
| 
 |  | ||||||
|   networking = { |  | ||||||
|     hostName = "fox"; |  | ||||||
|     interfaces.enp1s0f0np0.ipv4.addresses = [ { |  | ||||||
|       address = "10.0.40.26"; |  | ||||||
|       prefixLength = 24; |  | ||||||
|     } ]; |  | ||||||
|   }; |  | ||||||
| 
 |  | ||||||
|   # Configure Nvidia driver to use with CUDA |  | ||||||
|   hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; |  | ||||||
|   hardware.graphics.enable = true; |  | ||||||
|   nixpkgs.config.allowUnfree = true; |  | ||||||
|   nixpkgs.config.nvidia.acceptLicense = true; |  | ||||||
|   services.xserver.videoDrivers = [ "nvidia" ]; |  | ||||||
| 
 |  | ||||||
|   # Mount NVME disks |  | ||||||
|   fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; }; |  | ||||||
|   fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; }; |  | ||||||
| 
 |  | ||||||
|   # Make a /nvme{0,1}/$USER directory for each user. |  | ||||||
|   systemd.services.create-nvme-dirs = let |  | ||||||
|     # Take only normal users in fox |  | ||||||
|     users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users; |  | ||||||
|     commands = lib.concatLists (lib.mapAttrsToList |  | ||||||
|       (_: user: [ |  | ||||||
|         "install -d -o ${user.name} -g ${user.group} -m 0755 /nvme{0,1}/${user.name}" |  | ||||||
|       ]) users); |  | ||||||
|     script = pkgs.writeShellScript "create-nvme-dirs.sh" (lib.concatLines commands); |  | ||||||
|   in { |  | ||||||
|     enable = true; |  | ||||||
|     wants = [ "local-fs.target" ]; |  | ||||||
|     after = [ "local-fs.target" ]; |  | ||||||
|     wantedBy = [ "multi-user.target" ]; |  | ||||||
|     serviceConfig.ExecStart = script; |  | ||||||
|   }; |  | ||||||
| 
 |  | ||||||
|   # Only allow SSH connections from users who have a SLURM allocation |  | ||||||
|   # See: https://slurm.schedmd.com/pam_slurm_adopt.html |  | ||||||
|   security.pam.services.sshd.rules.account.slurm = { |  | ||||||
|     control = "required"; |  | ||||||
|     enable = true; |  | ||||||
|     modulePath = "${pkgs.slurm}/lib/security/pam_slurm_adopt.so"; |  | ||||||
|     args = [ "log_level=debug5" ]; |  | ||||||
|     order = 999999; # Make it last one |  | ||||||
|   }; |  | ||||||
| 
 |  | ||||||
|   # Disable systemd session (pam_systemd.so) as it will conflict with the |  | ||||||
|   # pam_slurm_adopt.so module. What happens is that the shell is first adopted |  | ||||||
|   # into the slurmstepd task and then into the systemd session, which is not |  | ||||||
|   # what we want, otherwise it will linger even if all jobs are gone. |  | ||||||
|   security.pam.services.sshd.startSession = lib.mkForce false; |  | ||||||
| } |  | ||||||
| @ -1,9 +1,8 @@ | |||||||
| { pkgs, lib, config, ... }: | { pkgs, lib, config, ... }: | ||||||
| 
 | 
 | ||||||
| { | { | ||||||
|   age.secrets.gitlab-pm-shell.file = ../../secrets/gitlab-runner-shell-token.age; |   age.secrets.gitlabRunnerShellToken.file = ../../secrets/gitlab-runner-shell-token.age; | ||||||
|   age.secrets.gitlab-pm-docker.file = ../../secrets/gitlab-runner-docker-token.age; |   age.secrets.gitlabRunnerDockerToken.file = ../../secrets/gitlab-runner-docker-token.age; | ||||||
|   age.secrets.gitlab-bsc-docker.file = ../../secrets/gitlab-bsc-docker-token.age; |  | ||||||
| 
 | 
 | ||||||
|   services.gitlab-runner = { |   services.gitlab-runner = { | ||||||
|     enable = true; |     enable = true; | ||||||
| @ -22,88 +21,20 @@ | |||||||
|           "--docker-network-mode host" |           "--docker-network-mode host" | ||||||
|         ]; |         ]; | ||||||
|         environmentVariables = { |         environmentVariables = { | ||||||
|           https_proxy = "http://hut:23080"; |           https_proxy = "http://localhost:23080"; | ||||||
|           http_proxy = "http://hut:23080"; |           http_proxy = "http://localhost:23080"; | ||||||
|         }; |         }; | ||||||
|       }; |       }; | ||||||
|     in { |     in { | ||||||
|       # For pm.bsc.es/gitlab |       # For pm.bsc.es/gitlab | ||||||
|       gitlab-pm-shell = common-shell // { |       gitlab-pm-shell = common-shell // { | ||||||
|         authenticationTokenConfigFile = config.age.secrets.gitlab-pm-shell.path; |         authenticationTokenConfigFile = config.age.secrets.gitlabRunnerShellToken.path; | ||||||
|       }; |       }; | ||||||
|       gitlab-pm-docker = common-docker // { |       gitlab-pm-docker = common-docker // { | ||||||
|         authenticationTokenConfigFile = config.age.secrets.gitlab-pm-docker.path; |         authenticationTokenConfigFile = config.age.secrets.gitlabRunnerDockerToken.path; | ||||||
|       }; |  | ||||||
| 
 |  | ||||||
|       gitlab-bsc-docker = { |  | ||||||
|         # gitlab.bsc.es still uses the old token mechanism |  | ||||||
|         registrationConfigFile = config.age.secrets.gitlab-bsc-docker.path; |  | ||||||
|         tagList = [ "docker" "hut" ]; |  | ||||||
|         environmentVariables = { |  | ||||||
|           # We cannot access the hut local interface from docker, so we connect |  | ||||||
|           # to hut directly via the ethernet one. |  | ||||||
|           https_proxy = "http://hut:23080"; |  | ||||||
|           http_proxy = "http://hut:23080"; |  | ||||||
|         }; |  | ||||||
|         executor = "docker"; |  | ||||||
|         dockerImage = "alpine"; |  | ||||||
|         dockerVolumes = [ |  | ||||||
|           "/nix/store:/nix/store:ro" |  | ||||||
|           "/nix/var/nix/db:/nix/var/nix/db:ro" |  | ||||||
|           "/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro" |  | ||||||
|         ]; |  | ||||||
|         dockerExtraHosts = [ |  | ||||||
|           # Required to pass the proxy via hut |  | ||||||
|           "hut:10.0.40.7" |  | ||||||
|         ]; |  | ||||||
|         dockerDisableCache = true; |  | ||||||
|         registrationFlags = [ |  | ||||||
|           # Increase build log length to 64 MiB |  | ||||||
|           "--output-limit 65536" |  | ||||||
|         ]; |  | ||||||
|         preBuildScript = pkgs.writeScript "setup-container" '' |  | ||||||
|           mkdir -p -m 0755 /nix/var/log/nix/drvs |  | ||||||
|           mkdir -p -m 0755 /nix/var/nix/gcroots |  | ||||||
|           mkdir -p -m 0755 /nix/var/nix/profiles |  | ||||||
|           mkdir -p -m 0755 /nix/var/nix/temproots |  | ||||||
|           mkdir -p -m 0755 /nix/var/nix/userpool |  | ||||||
|           mkdir -p -m 1777 /nix/var/nix/gcroots/per-user |  | ||||||
|           mkdir -p -m 1777 /nix/var/nix/profiles/per-user |  | ||||||
|           mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root |  | ||||||
|           mkdir -p -m 0700 "$HOME/.nix-defexpr" |  | ||||||
|           mkdir -p -m 0700 "$HOME/.ssh" |  | ||||||
|           cat > "$HOME/.ssh/config" << EOF |  | ||||||
|           Host bscpm04.bsc.es gitlab-internal.bsc.es |  | ||||||
|             User git |  | ||||||
|             ProxyCommand nc -X connect -x hut:23080 %h %p |  | ||||||
|           Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es |  | ||||||
|             ProxyCommand nc -X connect -x hut:23080 %h %p |  | ||||||
|           EOF |  | ||||||
|           cat >> "$HOME/.ssh/known_hosts" << EOF |  | ||||||
|           bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT |  | ||||||
|           gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3 |  | ||||||
|           EOF |  | ||||||
|           . ${pkgs.nix}/etc/profile.d/nix-daemon.sh |  | ||||||
|           # Required to load SSL certificate paths |  | ||||||
|           . ${pkgs.cacert}/nix-support/setup-hook |  | ||||||
|         ''; |  | ||||||
|         environmentVariables = { |  | ||||||
|           ENV = "/etc/profile"; |  | ||||||
|           USER = "root"; |  | ||||||
|           NIX_REMOTE = "daemon"; |  | ||||||
|           PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin"; |  | ||||||
|       }; |       }; | ||||||
|     }; |     }; | ||||||
|   }; |   }; | ||||||
|   }; |  | ||||||
| 
 |  | ||||||
|   # DOCKER* chains are useless, override at FORWARD |  | ||||||
|   networking.firewall.extraCommands = '' |  | ||||||
|     # Allow docker to use our proxy |  | ||||||
|     iptables -I FORWARD 1 -p tcp -i docker0 -d hut --dport 23080 -j nixos-fw-accept |  | ||||||
|     # Block anything else coming from docker |  | ||||||
|     iptables -I FORWARD 2 -p all -i docker0 -j nixos-fw-log-refuse |  | ||||||
|   ''; |  | ||||||
| 
 | 
 | ||||||
|   #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; |   #systemd.services.gitlab-runner.serviceConfig.Shell = "${pkgs.bash}/bin/bash"; | ||||||
|   systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; |   systemd.services.gitlab-runner.serviceConfig.DynamicUser = lib.mkForce false; | ||||||
|  | |||||||
							
								
								
									
										13
									
								
								m/hut/ipmi.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								m/hut/ipmi.yml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | |||||||
|  | modules: | ||||||
|  |         default: | ||||||
|  |                 collectors: | ||||||
|  |                 - bmc | ||||||
|  |                 - ipmi | ||||||
|  |                 - chassis | ||||||
|  | 
 | ||||||
|  |         lan: | ||||||
|  |                 collectors: | ||||||
|  |                 - ipmi | ||||||
|  |                 - chassis | ||||||
|  |                 user: "" | ||||||
|  |                 pass: "" | ||||||
| @ -12,8 +12,6 @@ | |||||||
|     mode = "400"; |     mode = "400"; | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|   age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age; |  | ||||||
| 
 |  | ||||||
|   services.grafana = { |   services.grafana = { | ||||||
|     enable = true; |     enable = true; | ||||||
|     settings = { |     settings = { | ||||||
| @ -75,7 +73,7 @@ | |||||||
|         enable = true; |         enable = true; | ||||||
|         group = "root"; |         group = "root"; | ||||||
|         user = "root"; |         user = "root"; | ||||||
|         configFile = config.age.secrets.ipmiYml.path; |         configFile = ./ipmi.yml; | ||||||
|         #extraFlags = [ "--log.level=debug" ]; |         #extraFlags = [ "--log.level=debug" ]; | ||||||
|         listenAddress = "127.0.0.1"; |         listenAddress = "127.0.0.1"; | ||||||
|       }; |       }; | ||||||
| @ -208,7 +206,7 @@ | |||||||
|             # Sets the "instance" label with the remote host we are querying |             # Sets the "instance" label with the remote host we are querying | ||||||
|             source_labels = [ "__param_target" ]; |             source_labels = [ "__param_target" ]; | ||||||
|             separator = ";"; |             separator = ";"; | ||||||
|             regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end |             regex = "(.*)"; | ||||||
|             target_label = "instance"; |             target_label = "instance"; | ||||||
|             replacement = "\${1}"; |             replacement = "\${1}"; | ||||||
|             action = "replace"; |             action = "replace"; | ||||||
| @ -250,17 +248,6 @@ | |||||||
|           module = [ "raccoon" ]; |           module = [ "raccoon" ]; | ||||||
|         }; |         }; | ||||||
|       } |       } | ||||||
|       { |  | ||||||
|         job_name = "ipmi-fox"; |  | ||||||
|         metrics_path = "/ipmi"; |  | ||||||
|         static_configs = [ |  | ||||||
|           { targets = [ "127.0.0.1:9290" ]; } |  | ||||||
|         ]; |  | ||||||
|         params = { |  | ||||||
|           target = [ "fox-ipmi" ]; |  | ||||||
|           module = [ "fox" ]; |  | ||||||
|         }; |  | ||||||
|       } |  | ||||||
|     ]; |     ]; | ||||||
|   }; |   }; | ||||||
| } | } | ||||||
|  | |||||||
| @ -12,8 +12,6 @@ let | |||||||
|     installPhase = '' |     installPhase = '' | ||||||
|       cp -r public $out |       cp -r public $out | ||||||
|     ''; |     ''; | ||||||
|     # Don't mess doc/ |  | ||||||
|     dontFixup = true; |  | ||||||
|   }; |   }; | ||||||
| in | in | ||||||
| { | { | ||||||
|  | |||||||
| @ -1,15 +1,15 @@ | |||||||
| - targets: | - targets: | ||||||
|   - owl1-ipmi |   - 10.0.40.101 | ||||||
|   - owl2-ipmi |   - 10.0.40.102 | ||||||
|   - xeon03-ipmi |   - 10.0.40.103 | ||||||
|   - xeon04-ipmi |   - 10.0.40.104 | ||||||
|   - koro-ipmi |   - 10.0.40.105 | ||||||
|   - xeon06-ipmi |   - 10.0.40.106 | ||||||
|   - hut-ipmi |   - 10.0.40.107 | ||||||
|   - eudy-ipmi |   - 10.0.40.108 | ||||||
|   # Storage |   # Storage | ||||||
|   - bay-ipmi |   - 10.0.40.141 | ||||||
|   - oss01-ipmi |   - 10.0.40.142 | ||||||
|   - lake2-ipmi |   - 10.0.40.143 | ||||||
|   labels: |   labels: | ||||||
|     job: ipmi-lan |     job: ipmi-lan | ||||||
|  | |||||||
| @ -43,13 +43,12 @@ in { | |||||||
|     clusterName = "jungle"; |     clusterName = "jungle"; | ||||||
|     nodeName = [ |     nodeName = [ | ||||||
|       "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" |       "owl[1,2]  Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" | ||||||
|       "fox       Sockets=2 CoresPerSocket=96 ThreadsPerCore=1 Feature=fox" |  | ||||||
|       "hut       Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" |       "hut       Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" | ||||||
|     ]; |     ]; | ||||||
| 
 | 
 | ||||||
|     partitionName = [ |     partitionName = [ | ||||||
|       "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" |       "owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP" | ||||||
|       "fox Nodes=fox          Default=NO  DefaultTime=01:00:00 MaxTime=INFINITE State=UP" |       "all Nodes=owl[1-2],hut Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP" | ||||||
|     ]; |     ]; | ||||||
| 
 | 
 | ||||||
|     # See slurm.conf(5) for more details about these options. |     # See slurm.conf(5) for more details about these options. | ||||||
| @ -77,7 +76,7 @@ in { | |||||||
|       SuspendTimeout=60 |       SuspendTimeout=60 | ||||||
|       ResumeProgram=${resumeProgram} |       ResumeProgram=${resumeProgram} | ||||||
|       ResumeTimeout=300 |       ResumeTimeout=300 | ||||||
|       SuspendExcNodes=hut,fox |       SuspendExcNodes=hut | ||||||
| 
 | 
 | ||||||
|       # Turn the nodes off after 1 hour of inactivity |       # Turn the nodes off after 1 hour of inactivity | ||||||
|       SuspendTime=3600 |       SuspendTime=3600 | ||||||
| @ -92,29 +91,9 @@ in { | |||||||
|       # Ignore memory constraints and only use unused cores to share a node with |       # Ignore memory constraints and only use unused cores to share a node with | ||||||
|       # other jobs. |       # other jobs. | ||||||
|       SelectTypeParameters=CR_Core |       SelectTypeParameters=CR_Core | ||||||
| 
 |  | ||||||
|       # Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html |  | ||||||
|       # This sets up the "extern" step into which ssh-launched processes will be |  | ||||||
|       # adopted. Alloc runs the prolog at job allocation (salloc) rather than |  | ||||||
|       # when a task runs (srun) so we can ssh early. |  | ||||||
|       PrologFlags=Alloc,Contain,X11 |  | ||||||
| 
 |  | ||||||
|       # LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes |  | ||||||
|       # adopted by the external step, similar to tasks running in regular steps |  | ||||||
|       # LaunchParameters=ulimit_pam_adopt |  | ||||||
|       SlurmdDebug=debug5 |  | ||||||
|       #DebugFlags=Protocol,Cgroup |  | ||||||
|     ''; |  | ||||||
| 
 |  | ||||||
|     extraCgroupConfig = '' |  | ||||||
|       CgroupPlugin=cgroup/v2 |  | ||||||
|       #ConstrainCores=yes |  | ||||||
|     ''; |     ''; | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|   # Place the slurm config in /etc as this will be required by PAM |  | ||||||
|   environment.etc.slurm.source = config.services.slurm.etcSlurm; |  | ||||||
| 
 |  | ||||||
|   age.secrets.mungeKey = { |   age.secrets.mungeKey = { | ||||||
|     file = ../../secrets/munge-key.age; |     file = ../../secrets/munge-key.age; | ||||||
|     owner = "munge"; |     owner = "munge"; | ||||||
|  | |||||||
| @ -39,18 +39,6 @@ final: prev: | |||||||
|       # See https://bugs.schedmd.com/show_bug.cgi?id=19324 |       # See https://bugs.schedmd.com/show_bug.cgi?id=19324 | ||||||
|       ./slurm-rank-expansion.patch |       ./slurm-rank-expansion.patch | ||||||
|     ]; |     ]; | ||||||
|     # Install also the pam_slurm_adopt library to restrict users from accessing |  | ||||||
|     # nodes with no job allocated. |  | ||||||
|     postBuild = (old.postBuild or "") + '' |  | ||||||
|       pushd contribs/pam_slurm_adopt |  | ||||||
|         make "PAM_DIR=$out/lib/security" |  | ||||||
|       popd |  | ||||||
|     ''; |  | ||||||
|     postInstall = (old.postInstall or "") + '' |  | ||||||
|       pushd contribs/pam_slurm_adopt |  | ||||||
|         make "PAM_DIR=$out/lib/security" install |  | ||||||
|       popd |  | ||||||
|     ''; |  | ||||||
|   }); |   }); | ||||||
| 
 | 
 | ||||||
|   prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; |   prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; | ||||||
|  | |||||||
										
											Binary file not shown.
										
									
								
							| @ -1,9 +1,9 @@ | |||||||
| age-encryption.org/v1 | age-encryption.org/v1 | ||||||
| -> ssh-ed25519 HY2yRg eRVX5yndWDLg9hw7sY1Iu8pJFy47luHvdL+zZGK2u1s | -> ssh-ed25519 HY2yRg DQdgCk16Yu524BsrWVf0krnwWzDM6SeaJCgQipOfwCA | ||||||
| e1nXXiMW0ywkZYh2s6c7/quGMfBOJOaRhNQDjCD2Iyc | Ab9ocqra/UWJZI+QGMlxUhBu5AzqfjPgXl+ENIiHYGs | ||||||
| -> ssh-ed25519 CAWG4Q gYG7GRxRpJ0/5Wz0Z0J2wfLfkMFNmcy81dQEewM7gUA | -> ssh-ed25519 CAWG4Q KF9rGCenb3nf+wyz2hyVs/EUEbsmUs5R+1fBxlCibC8 | ||||||
| lamdUdx+xOFWF1lmUM4x9TT0cJtKu9Sp7w9JHwm13u0 | 7++Kxbr3FHVdVfnFdHYdAuR0Tgfd+sRcO6WRss6LhEw | ||||||
| -> ssh-ed25519 MSF3dg HEzfpR8alG6WPzhaEjAmmjOFoFcMSQUldx46dBsXri4 | -> ssh-ed25519 MSF3dg aUe4DhRsu4X8CFOEAnD/XM/o/0qHYSB522woCaAVh0I | ||||||
| OAD5H/zZGhfevYrFJzJrbNKPomKZDOS9Qx5tmTp78Jo | GRcs5cm2YqA/lGhUtbpboBaz7mfgiLaCr+agaB7vACU | ||||||
| --- A0sMSiNXWaEIgRXR0x6UAIaluuVH6Zlv4CJ9sI0NXOw | --- 9Q7Ou+Pxq+3RZilCb2dKC/pCFjZEt4rp5KnTUUU7WJ8 | ||||||
| ÿú6çphóÎÆ{Ñ>®F|ÅiÃvâæE}{ìruÎâÆ·‹Ý°ËÍ}^»‰>ñc6¥´j÷	ùgèGW<47>Ã:—J3ù|ø|†ZÑ | 1¬Mw4‘Í	ì:Hµ@Á/ägLtMÇ,߯¥ô*¡žzñNV5ˆm‚ÍNŽoÞáj1$÷TøG_³E{Œ%“‰1ǯ‘<>H£îAÛp™ | ||||||
| @ -1,11 +0,0 @@ | |||||||
| age-encryption.org/v1 |  | ||||||
| -> ssh-ed25519 HY2yRg WSdjyQPzBJ4JbzQpGeq1AAYpWKoXmLI1ZtmNmM5QOzs |  | ||||||
| qGDlDT31DQF1DdHen0+5+52DdsQlabJdA2pOB5O1I6g |  | ||||||
| -> ssh-ed25519 CAWG4Q wioWMDxQjN+d4JdIbCwZg0DLQu1OH2mV6gukRprjuAs |  | ||||||
| 670fE61hidOEh20hHiQAhP0+CjDF0WMBNzgwkGT8Yqg |  | ||||||
| -> ssh-ed25519 MSF3dg DN19uvAEtqq4708P6HpuX9i/o/qAvHX6dj69dCF2H1o |  | ||||||
| 4Lu9GnjiFLMeXJ2C7aVPJsCHCQVlhylNWJi896Av92s |  | ||||||
| --- 7cKBwOYNOUZ2h3/kAY09aSMASZSxX7hZIT4kvlIiT6w |  | ||||||
| ³6—çà•äfQF5=¦bX+‡v e`Ï7/øªA~PÎÖѦ7<15>Ì |  | ||||||
| ´ÖA÷)·h³ù=oZ¸$é^´V0ñ/Ü…µr |  | ||||||
| k¸uœbĶ:R‘<52>>^gŒõ¼ik_*%<0B>a7ùKGæ<47>ÐÖçâ&PI¶£n |  | ||||||
| @ -1,10 +1,9 @@ | |||||||
| age-encryption.org/v1 | age-encryption.org/v1 | ||||||
| -> ssh-ed25519 HY2yRg GdmdkW+BqqwBgu30b846jv3J7jtCM+a3rgOERuA050A | -> ssh-ed25519 HY2yRg 0sEIUEJBJQ0k0rBfHaOEbq1pNBqsPin4Xq85v0ds9jY | ||||||
| FeGqM75jG9egesR+yyVKHm0/M+uBBp5Hclg4+qN0BR8 | 4wzjLapoOcq53nT2K3hSGED4jTDXci25GLHkl/fL4EI | ||||||
| -> ssh-ed25519 CAWG4Q a0wTWHgulQUYDAMZmXf3dOf6PdYgCqNtSylzWVVRNVM | -> ssh-ed25519 CAWG4Q f68ZbJGwXuCZVnqhwbh+8dh0X/MCdjEd+sVtPyBu/hU | ||||||
| Bx+WSYaiY4ZwlSZJo2a1XPMQmbKOU7F0tKAqVRLBOPo | u2TQreyWQvP6PGuwuUNKA/AL68560flqSlaItN3k41I | ||||||
| -> ssh-ed25519 MSF3dg KccUvZZUbxbCrRWUWrX8KcHF6vQ5FV/BqUqI59G7dj4 | -> ssh-ed25519 MSF3dg HdrtRW2j7mfkLH0/4aJK5R0cWdjf56HYtEZgzHi9EAs | ||||||
| CFr7GXpZ9rPgy7HBfOyiYF9FnZUw6KcZwq9f7/0KaU8 | A6MF6tXmSUq2RF2bpmav0GFTRERwluSZGh2snP/KqkA | ||||||
| --- E0Rp6RR/8+o0jvB1lRdhnlabxvI6uu/IgL2ZpPXzTc8 | --- drsezqi7J/g8gm6N10SkfeAWnYct99WUraB5djLJqpo | ||||||
| û#ã¶H÷$°F;Ñéù%›È6êË2†¢rfXŸ\Dn ÖшºÈ‰©x™Î>¥Ù&;÷c‘UŠI=›ÑMöÀª?Tœ¡Ç¸ÂÂ"px†Ó\s‚ÙãbFý<46>ù¹WD¼{Ë | gÔ
(ìÐJ!M6¬É3e¸AÜæÃ?\1y÷eüFN\‘<>/MêòªN`K^€+"¤«Y^å>dÒH÷°‡¸†]P…ÓûJ‘`xôã»{Ú±ô„y°ÅÎøSˆéyPX{w‰Sï
ž^5X¶JPô;v‰ | ||||||
| AW>?U©ÙÊçÐHÔ³ |  | ||||||
| @ -1,9 +1,9 @@ | |||||||
| age-encryption.org/v1 | age-encryption.org/v1 | ||||||
| -> ssh-ed25519 HY2yRg xWRxJGWSzA5aplRYCYLB6aBwrUrQQJ2MtDYaD75V5nI | -> ssh-ed25519 HY2yRg VY8s9s1zuHOv2axmIacwKg2ozsJnskHTQtslRZ3YI1M | ||||||
| J07XF3NQiaYKKKNRcNWi9MloJD2wXHd+2K7bo6lF+QU | fKkJuydLOzF/ciPYSYu4ziSCozdl6sowvDMYZmxqmHY | ||||||
| -> ssh-ed25519 CAWG4Q jNWymbyCczcm8RcaIEbFQBlOMALsuxTl4+pLUi0aR20 | -> ssh-ed25519 CAWG4Q 2ARFd/7RWQ/QOk47FnJFChaVBgoV4LE6EA+JHezkXgg | ||||||
| z5NixlrRD+Y7Z/aFPs6hiDW4/lp8CBQCeJYpbuG9yYM | MV4g4Llv8Qcd/wUgJyoNG5AXb6o3aFTrOYGC+lXlSzw | ||||||
| -> ssh-ed25519 MSF3dg QsUQloEKN3k1G49FQnNR/Do6ILgGpjFcw3zu5kk1Ako | -> ssh-ed25519 MSF3dg SKoxWe8Mi8EkBjkESxStOCI5V4C0KYEXIOx7OdENgTA | ||||||
| IHwyFWUEWqCStNcFprnpBa8L5J6zKIsn+7HcgGRv3sM | p/owKwQ4e4pcGV+hqej2AfPU5QaM2i8VfxhlkjCM4Z4 | ||||||
| --- oUia0fsL6opeYWACyXtHAu/Ld+bUIt/7S1VszYTvwgU | --- 0VWKU5CQiGbiOtQ2tsZZg88oZm1qcUDEnU5zDTtV+KU | ||||||
| ™êVäœ*øtë2-Ÿ7·œ–Ž“§hÜ&‰éÍ¢_!Õ¿+”·±¯(‚ã¡nù¿	¬í(Ëê÷/}òœäáCúNÍ·|ÇNèuÎ5‰Ã¹å‹šKÀìlÆ"ÃØklOX¨yº÷æØàù¤¹ø²Aíõe„È$ | ŸÖuµcl÷ª`Ÿ¡Mþ¸'Vk6Yè!Ó=¦LÀ¦yš-ž¬ÁO¢Az«Æ˜VEK¦<4B>‚R†_ÌqL|1V•[)²qœ©„Æ“Lç<4C>DyÌÉ0¹_áßåq)-T,ƪú_9û ”?å<>àûib†1 | ||||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| @ -9,10 +9,8 @@ in | |||||||
|   "gitea-runner-token.age".publicKeys = hut; |   "gitea-runner-token.age".publicKeys = hut; | ||||||
|   "gitlab-runner-docker-token.age".publicKeys = hut; |   "gitlab-runner-docker-token.age".publicKeys = hut; | ||||||
|   "gitlab-runner-shell-token.age".publicKeys = hut; |   "gitlab-runner-shell-token.age".publicKeys = hut; | ||||||
|   "gitlab-bsc-docker-token.age".publicKeys = hut; |  | ||||||
|   "nix-serve.age".publicKeys = hut; |   "nix-serve.age".publicKeys = hut; | ||||||
|   "jungle-robot-password.age".publicKeys = hut; |   "jungle-robot-password.age".publicKeys = hut; | ||||||
|   "ipmi.yml.age".publicKeys = hut; |  | ||||||
| 
 | 
 | ||||||
|   "ceph-user.age".publicKeys = safe; |   "ceph-user.age".publicKeys = safe; | ||||||
|   "munge-key.age".publicKeys = safe; |   "munge-key.age".publicKeys = safe; | ||||||
|  | |||||||
| @ -11,7 +11,7 @@ access to the login machine using a resource petition in the BSC intranet. | |||||||
| 
 | 
 | ||||||
| Then, to request access to the machines we will need some information about you: | Then, to request access to the machines we will need some information about you: | ||||||
| 
 | 
 | ||||||
| 1. Which machines you want access to ([hut](/hut), [fox](/fox), owl1, owl2, eudy, koro...) | 1. Which machines you want access to (hut, owl1, owl2, eudy, koro...) | ||||||
| 1. Your user name and user id (to match the NFS permissions) | 1. Your user name and user id (to match the NFS permissions) | ||||||
| 1. Your real name and surname (for identification purposes) | 1. Your real name and surname (for identification purposes) | ||||||
| 1. The salted hash of your login password, generated with `mkpasswd -m sha-512` | 1. The salted hash of your login password, generated with `mkpasswd -m sha-512` | ||||||
|  | |||||||
| @ -1,10 +0,0 @@ | |||||||
| --- |  | ||||||
| title: "Docs" |  | ||||||
| description: "Documentation for users of jungle machines" |  | ||||||
| date: 2023-09-15 |  | ||||||
| --- |  | ||||||
| 
 |  | ||||||
| If this is the first time you use any of the jungle machines with NixOS, follow |  | ||||||
| the [quick start guide](quickstart). |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| @ -1,234 +0,0 @@ | |||||||
| --- |  | ||||||
| title: "Quick start" |  | ||||||
| date: 2023-09-15 |  | ||||||
| --- |  | ||||||
| 
 |  | ||||||
| This documentation will guide you on how to build custom packages of software |  | ||||||
| and use them in the jungle machines. It has been designed to reduce the friction |  | ||||||
| from users coming from module systems. |  | ||||||
| 
 |  | ||||||
| You should be able to access the jungle machines, otherwise [request |  | ||||||
| access](/access). |  | ||||||
| 
 |  | ||||||
| ## Changes from other HPC machines |  | ||||||
| 
 |  | ||||||
| Users of other machines have been using the Lmod tool (module load ...) to add |  | ||||||
| or remove programs from their environment, as well as manually building their |  | ||||||
| own software for too many years. |  | ||||||
| 
 |  | ||||||
| While we cannot prevent users from continuing to use this tedious mechanism, we |  | ||||||
| have designed the jungle machines to be much easier to operate by using the nix |  | ||||||
| package manager. |  | ||||||
| 
 |  | ||||||
| ### Freedom to install packages |  | ||||||
| 
 |  | ||||||
| When a user wanted to install a package, it was forced to either do it on its |  | ||||||
| own directory, or request a system administrator to install it in a shared |  | ||||||
| directory, so other users can also use that package. |  | ||||||
| 
 |  | ||||||
| This situation is gone, each user can install any package of software by |  | ||||||
| themselves, without requiring any other authorization. When two users request |  | ||||||
| the same package, the same copy will be provided. |  | ||||||
| 
 |  | ||||||
| A new package will be downloaded if it is available (someone already built it) |  | ||||||
| or will be built from source on demand. |  | ||||||
| 
 |  | ||||||
| ### No changes over time |  | ||||||
| 
 |  | ||||||
| All users retain the same versions of the packages they request until they |  | ||||||
| decide to update them. |  | ||||||
| 
 |  | ||||||
| ## Using nix to manage packages |  | ||||||
| 
 |  | ||||||
| In this chapter we show how to install packages and enter a development shell to |  | ||||||
| build new programs from source. The examples are done from the hut machine, |  | ||||||
| read [this page](/access) to request access. |  | ||||||
| 
 |  | ||||||
| ### Installing binaries |  | ||||||
| 
 |  | ||||||
| To temporarily install new packages, use: |  | ||||||
| 
 |  | ||||||
| ```text |  | ||||||
| hut% nix shell jungle#gcc jungle#cowsay jungle#ovni |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Notice that the packages are described as two parts divided by the `#` symbol. |  | ||||||
| The first part defines where to take the package from and the second part is |  | ||||||
| the name of the package. For now we will use `jungle#<package>`. You can find |  | ||||||
| many more packages here: |  | ||||||
| 
 |  | ||||||
| <https://search.nixos.org/packages> |  | ||||||
| 
 |  | ||||||
| You will now enter a new shell, where those requested package **binaries are |  | ||||||
| available in $PATH**: |  | ||||||
| 
 |  | ||||||
| ```text |  | ||||||
| hut% cowsay hello world |  | ||||||
|  _____________ |  | ||||||
| < hello world > |  | ||||||
|  ------------- |  | ||||||
|         \   ^__^ |  | ||||||
|          \  (oo)\_______ |  | ||||||
|             (__)\       )\/\ |  | ||||||
|                 ||----w | |  | ||||||
|                 ||     || |  | ||||||
| 
 |  | ||||||
| hut% ovniver |  | ||||||
| LD_LIBRARY_PATH not set |  | ||||||
| libovni: build v1.11.0 (a7103f8), dynamic v1.11.0 (a7103f8) |  | ||||||
| 
 |  | ||||||
| hut% gcc --version |  | ||||||
| gcc (GCC) 13.3.0 |  | ||||||
| Copyright (C) 2023 Free Software Foundation, Inc. |  | ||||||
| This is free software; see the source for copying conditions.  There is NO |  | ||||||
| warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| ### Building programs |  | ||||||
| 
 |  | ||||||
| The above method only loads new binaries in the `$PATH`. If we try to build a |  | ||||||
| program that includes headers or links with a library, it will fail to find |  | ||||||
| them: |  | ||||||
| 
 |  | ||||||
| ```text |  | ||||||
| hut$ cat test.c |  | ||||||
| #include <ovni.h> |  | ||||||
| 
 |  | ||||||
| int main() |  | ||||||
| { |  | ||||||
|         ovni_version_check(); |  | ||||||
|         return 0; |  | ||||||
| } |  | ||||||
| hut% gcc test.c -lovni -o test |  | ||||||
| test.c:1:10: fatal error: ovni.h: No such file or directory |  | ||||||
|     1 | #include <ovni.h> |  | ||||||
|       |          ^~~~~~~~ |  | ||||||
| compilation terminated. |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| We could manually add the full path to the ovni include directory with `-I` and |  | ||||||
| the libraries with `-L`, but there is a tool that already perform these steps |  | ||||||
| automatically for us, `nix develop`. |  | ||||||
| 
 |  | ||||||
| Let's go back to our original shell first, where those packages are not |  | ||||||
| available anymore: |  | ||||||
| 
 |  | ||||||
| ``` |  | ||||||
| hut% ps |  | ||||||
|     PID TTY          TIME CMD |  | ||||||
| 2356260 pts/1    00:00:01 zsh |  | ||||||
| 2457268 pts/1    00:00:00 zsh |  | ||||||
| 2457297 pts/1    00:00:00 ps |  | ||||||
| hut% exit |  | ||||||
| hut% ovniver |  | ||||||
| ovniver: command not found |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| ### Creating a flake.nix |  | ||||||
| 
 |  | ||||||
| To define which packages we want, we will write a small file that list them, a |  | ||||||
| flake.nix file. |  | ||||||
| 
 |  | ||||||
| First, we will create a new directory where we are going to be working: |  | ||||||
| 
 |  | ||||||
| ``` |  | ||||||
| hut% mkdir example |  | ||||||
| hut% cd exmple |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Then place this flake.nix file: |  | ||||||
| 
 |  | ||||||
| ```nix |  | ||||||
| { |  | ||||||
|   inputs.jungle.url = "jungle"; |  | ||||||
|   outputs = { self, jungle }:   |  | ||||||
|   let |  | ||||||
|     pkgs = jungle.outputs.packages.x86_64-linux; |  | ||||||
|   in { |  | ||||||
|     devShells.x86_64-linux.default = pkgs.mkShell { |  | ||||||
|       pname = "devshell"; |  | ||||||
|       buildInputs = with pkgs; [ |  | ||||||
|         ovni gcc cowsay # more packages here... |  | ||||||
|       ]; |  | ||||||
|     }; |  | ||||||
|   }; |  | ||||||
| } |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| Now enter the shell with: |  | ||||||
| 
 |  | ||||||
| ``` |  | ||||||
| hut% nix develop |  | ||||||
| warning: creating lock file '/home/Computational/rarias/example/flake.lock': |  | ||||||
| • Added input 'jungle': |  | ||||||
|     'path:/nix/store/27srv8haj6vv4ywrbmw0a8vds561m8rq-source?lastModified=1739479441&narHash=sha256-Kgjs8SO1w9NbPBu8ghwzCxYJ9kvWpoQOT%2BXwPvA9DcU%3D&rev=76396c0d67ef0cf32377d5c1894bb695293bca9d' (2025-02-13) |  | ||||||
| • Added input 'jungle/agenix': |  | ||||||
|     'github:ryantm/agenix/f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41?narHash=sha256-b%2Buqzj%2BWa6xgMS9aNbX4I%2BsXeb5biPDi39VgvSFqFvU%3D' (2024-08-10) |  | ||||||
| • Added input 'jungle/agenix/darwin': |  | ||||||
|     'github:lnl7/nix-darwin/4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d?narHash=sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0%3D' (2023-11-24) |  | ||||||
| • Added input 'jungle/agenix/darwin/nixpkgs': |  | ||||||
|     follows 'jungle/agenix/nixpkgs' |  | ||||||
| • Added input 'jungle/agenix/home-manager': |  | ||||||
|     'github:nix-community/home-manager/3bfaacf46133c037bb356193bd2f1765d9dc82c1?narHash=sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE%3D' (2023-12-20) |  | ||||||
| • Added input 'jungle/agenix/home-manager/nixpkgs': |  | ||||||
|     follows 'jungle/agenix/nixpkgs' |  | ||||||
| • Added input 'jungle/agenix/nixpkgs': |  | ||||||
|     follows 'jungle/nixpkgs' |  | ||||||
| • Added input 'jungle/agenix/systems': |  | ||||||
|     'github:nix-systems/default/da67096a3b9bf56a91d16901293e51ba5b49a27e?narHash=sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768%3D' (2023-04-09) |  | ||||||
| • Added input 'jungle/bscpkgs': |  | ||||||
|     'git+https://git.sr.ht/~rodarima/bscpkgs?ref=refs/heads/master&rev=6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f' (2024-11-29) |  | ||||||
| • Added input 'jungle/bscpkgs/nixpkgs': |  | ||||||
|     follows 'jungle/nixpkgs' |  | ||||||
| • Added input 'jungle/nixpkgs': |  | ||||||
|     'github:NixOS/nixpkgs/9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc?narHash=sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8%3D' (2025-01-14) |  | ||||||
| 
 |  | ||||||
| hut$  |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Notice that long list of messages is Nix creating a new flake.lock file with the |  | ||||||
| current state of the packages. Next invocations will use the same packages as |  | ||||||
| described by the lock file. |  | ||||||
| 
 |  | ||||||
| ### Building a program from nix develop |  | ||||||
| 
 |  | ||||||
| Now let's try again building our test program: |  | ||||||
| 
 |  | ||||||
| ```text |  | ||||||
| hut$ cat test.c |  | ||||||
| #include <ovni.h> |  | ||||||
| 
 |  | ||||||
| int main() |  | ||||||
| { |  | ||||||
|         ovni_version_check(); |  | ||||||
|         return 0; |  | ||||||
| } |  | ||||||
| hut$ gcc test.c -o test -lovni |  | ||||||
| hut$ ldd test |  | ||||||
|         linux-vdso.so.1 (0x00007ffff7fc4000) |  | ||||||
|         libovni.so.1 => /nix/store/sqk972akjv0q8dchn8ccjln2llzyyfd0-ovni-1.11.0/lib/libovni.so.1 (0x00007ffff7fab000) |  | ||||||
|         libc.so.6 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/libc.so.6 (0x00007ffff7db2000) |  | ||||||
|         /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib/ld-linux-x86-64.so.2 => /nix/store/nqb2ns2d1lahnd5ncwmn6k84qfd7vx2k-glibc-2.40-36/lib64/ld-linux-x86-64.so.2 (0x00007ffff7fc6000) |  | ||||||
| hut$ ./test |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Now the ovni.h header and the libovni library are found and the program is |  | ||||||
| successfully built, linked and executed. |  | ||||||
| 
 |  | ||||||
| You can add more packages as needed in your flake.nix: |  | ||||||
| 
 |  | ||||||
| ```nix |  | ||||||
|   buildInputs = with pkgs; [ |  | ||||||
|     ovni gcc cowsay # more packages here... |  | ||||||
|   ]; |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Make sure you exit the develop shell first, and then enter again with `nix |  | ||||||
| develop`. |  | ||||||
| 
 |  | ||||||
| ## Remember |  | ||||||
| 
 |  | ||||||
| - `nix shell` places binaries in the `$PATH`. |  | ||||||
| - `nix develop` enters a development shell where both binaries and the libraries |  | ||||||
|   and includes are available so you can build new programs. |  | ||||||
| @ -1,97 +0,0 @@ | |||||||
| --- |  | ||||||
| title: "Fox" |  | ||||||
| description: "AMD Genoa 9684X with 2 NVIDIA RTX4000 GPUs" |  | ||||||
| date: 2025-02-12 |  | ||||||
| --- |  | ||||||
| 
 |  | ||||||
|  |  | ||||||
| 
 |  | ||||||
| Picture by [Joanne Redwood](https://web.archive.org/web/20191109175146/https://www.inaturalist.org/photos/6568074), |  | ||||||
| [CC0](http://creativecommons.org/publicdomain/zero/1.0/deed.en). |  | ||||||
| 
 |  | ||||||
| The *fox* machine is a big GPU server that is configured to run heavy workloads. |  | ||||||
| It has two fast AMD CPUs with large cache and 2 reasonable NVIDIA GPUs. Here are |  | ||||||
| the detailed specifications: |  | ||||||
| 
 |  | ||||||
| - 2x AMD GENOA X 9684X DP/UP 96C/192T 2.55G 1,150M 400W SP5 3D V-cach |  | ||||||
| - 24x 32GB DDR5-4800 ECC RDIMM (total 768 GiB of RAM) |  | ||||||
| - 1x 2.5" SSD SATA3 MICRON 5400 MAX 480GB |  | ||||||
| - 2x 2.5" KIOXIA CM7-R 1.92TB NVMe GEN5 PCIe 5x4 |  | ||||||
| - 2x NVIDIA RTX4000 ADA Gen 20GB GDDR6 PCIe 4.0 |  | ||||||
| 
 |  | ||||||
| ## Access |  | ||||||
| 
 |  | ||||||
| To access the machine, request a SLURM session from [hut](/hut) using the `fox` |  | ||||||
| partition: |  | ||||||
| 
 |  | ||||||
|     hut% salloc -p fox |  | ||||||
| 
 |  | ||||||
| Then connect via ssh: |  | ||||||
| 
 |  | ||||||
|     hut% ssh fox |  | ||||||
|     fox% |  | ||||||
| 
 |  | ||||||
| Follow [these steps](/access) if you don't have access to hut or fox. |  | ||||||
| 
 |  | ||||||
| ## CUDA |  | ||||||
| 
 |  | ||||||
| To use CUDA, you can use the following `flake.nix` placed in a new directory to |  | ||||||
| load all the required dependencies: |  | ||||||
| 
 |  | ||||||
| ```nix |  | ||||||
| { |  | ||||||
|   inputs.jungle.url = "jungle"; |  | ||||||
| 
 |  | ||||||
|   outputs = { jungle, ... }: { |  | ||||||
|     devShell.x86_64-linux = let |  | ||||||
|       pkgs = jungle.nixosConfigurations.fox.pkgs; |  | ||||||
|     in pkgs.mkShell { |  | ||||||
|       name = "cuda-env-shell"; |  | ||||||
|       buildInputs = with pkgs; [ |  | ||||||
|         git gitRepo gnupg autoconf curl |  | ||||||
|         procps gnumake util-linux m4 gperf unzip |  | ||||||
| 
 |  | ||||||
|         # Cuda packages (more at https://search.nixos.org/packages) |  | ||||||
|         cudatoolkit linuxPackages.nvidia_x11 |  | ||||||
|         cudaPackages.cuda_cudart.static |  | ||||||
|         cudaPackages.libcusparse |  | ||||||
| 
 |  | ||||||
|         libGLU libGL |  | ||||||
|         xorg.libXi xorg.libXmu freeglut |  | ||||||
|         xorg.libXext xorg.libX11 xorg.libXv xorg.libXrandr zlib |  | ||||||
|         ncurses5 stdenv.cc binutils |  | ||||||
|       ]; |  | ||||||
|       shellHook = '' |  | ||||||
|         export CUDA_PATH=${pkgs.cudatoolkit} |  | ||||||
|         export LD_LIBRARY_PATH=/var/run/opengl-driver/lib |  | ||||||
|         export SMS=50 |  | ||||||
|       ''; |  | ||||||
|     }; |  | ||||||
|   }; |  | ||||||
| } |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Then just run `nix develop` from the same directory: |  | ||||||
| 
 |  | ||||||
|     % mkdir cuda |  | ||||||
|     % cd cuda |  | ||||||
|     % vim flake.nix |  | ||||||
|     [...] |  | ||||||
|     % nix develop |  | ||||||
|     $ nvcc -V |  | ||||||
|     nvcc: NVIDIA (R) Cuda compiler driver |  | ||||||
|     Copyright (c) 2005-2024 NVIDIA Corporation |  | ||||||
|     Built on Tue_Feb_27_16:19:38_PST_2024 |  | ||||||
|     Cuda compilation tools, release 12.4, V12.4.99 |  | ||||||
|     Build cuda_12.4.r12.4/compiler.33961263_0 |  | ||||||
| 
 |  | ||||||
| ## Filesystems |  | ||||||
| 
 |  | ||||||
| The machine has several file systems available. |  | ||||||
| 
 |  | ||||||
| - `$HOME`: Mounted via NFS across all nodes. It is slow and has low capacity. |  | ||||||
|   Don't abuse. |  | ||||||
| - `/ceph/home/$USER`: Shared Ceph file system across jungle nodes. Slow but high |  | ||||||
|   capacity. Stores three redundant copies of every file. |  | ||||||
| - `/nvme{0,1}/$USER`: The two local NVME disks, very fast and large capacity. |  | ||||||
| - `/tmp`: tmpfs, fast but not backed by a disk. Will be erased on reboot. |  | ||||||
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 126 KiB | 
| @ -3,38 +3,32 @@ languageCode = 'en-us' | |||||||
| title = 'The jungle' | title = 'The jungle' | ||||||
| theme = 'PaperMod' | theme = 'PaperMod' | ||||||
| 
 | 
 | ||||||
| [[menu.main]] |  | ||||||
| identifier = "doc" |  | ||||||
| name = "Docs" |  | ||||||
| url = "/doc/" |  | ||||||
| weight = 10 |  | ||||||
| 
 |  | ||||||
| [[menu.main]] | [[menu.main]] | ||||||
| identifier = "grafana" | identifier = "grafana" | ||||||
| name = "Grafana" | name = "Grafana" | ||||||
| url = "/grafana/" | url = "/grafana/" | ||||||
| weight = 20 | weight = 10 | ||||||
| 
 | 
 | ||||||
| [[menu.main]] | [[menu.main]] | ||||||
| identifier = "Git" | identifier = "Git" | ||||||
| name = "Git" | name = "Git" | ||||||
| url = "/git/" | url = "/git/" | ||||||
| weight = 30 | weight = 20 | ||||||
| 
 | 
 | ||||||
| [[menu.main]] | [[menu.main]] | ||||||
| identifier = "Lists" | identifier = "Lists" | ||||||
| name = "Lists" | name = "Lists" | ||||||
| url = "/lists/" | url = "/lists/" | ||||||
| weight = 40 | weight = 30 | ||||||
| 
 | 
 | ||||||
| [[menu.main]] | [[menu.main]] | ||||||
| identifier = "Paste" | identifier = "Paste" | ||||||
| name = "Paste" | name = "Paste" | ||||||
| url = "/paste/" | url = "/paste/" | ||||||
| weight = 50 | weight = 40 | ||||||
| 
 | 
 | ||||||
| [[menu.main]] | [[menu.main]] | ||||||
| identifier = "Posts" | identifier = "Posts" | ||||||
| name = "Posts" | name = "Posts" | ||||||
| url = "/posts/" | url = "/posts/" | ||||||
| weight = 60 | weight = 50 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user