Add script to monitor GPFS
Reviewed-by: Aleix Boné <abonerib@bsc.es>
This commit is contained in:
		
							parent
							
								
									d335d69ba6
								
							
						
					
					
						commit
						8190523c30
					
				
							
								
								
									
										31
									
								
								m/hut/gpfs-probe.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								m/hut/gpfs-probe.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | |||||||
|  | { pkgs, config, lib, ... }: | ||||||
|  | let | ||||||
|  |   gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { } | ||||||
|  |     '' | ||||||
|  |       cp ${./gpfs-probe.sh} $out; | ||||||
|  |       chmod +x $out | ||||||
|  |     '' | ||||||
|  |   ; | ||||||
|  | in | ||||||
|  | { | ||||||
|  |   # Use a new user to handle the SSH keys | ||||||
|  |   users.groups.ssh-robot = { }; | ||||||
|  |   users.users.ssh-robot = { | ||||||
|  |     description = "SSH Robot"; | ||||||
|  |     isNormalUser = true; | ||||||
|  |     home = "/var/lib/ssh-robot"; | ||||||
|  |   }; | ||||||
|  | 
 | ||||||
|  |   systemd.services.gpfs-probe = { | ||||||
|  |     description = "Daemon to report GPFS latency via SSH"; | ||||||
|  |     path = [ pkgs.openssh pkgs.netcat ]; | ||||||
|  |     after = [ "network.target" ]; | ||||||
|  |     wantedBy = [ "default.target" ]; | ||||||
|  |     serviceConfig = { | ||||||
|  |       Type = "simple"; | ||||||
|  |       ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}"; | ||||||
|  |       User = "ssh-robot"; | ||||||
|  |       Group = "ssh-robot"; | ||||||
|  |     }; | ||||||
|  |   }; | ||||||
|  | } | ||||||
							
								
								
									
										18
									
								
								m/hut/gpfs-probe.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										18
									
								
								m/hut/gpfs-probe.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | #!/bin/sh | ||||||
|  | 
 | ||||||
|  | N=500 | ||||||
|  | 
 | ||||||
|  | t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}") | ||||||
|  | 
 | ||||||
|  | if [ -z "$t" ]; then | ||||||
|  |   t="5.00" | ||||||
|  | fi | ||||||
|  | 
 | ||||||
|  | cat <<EOF | ||||||
|  | HTTP/1.1 200 OK | ||||||
|  | Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values | ||||||
|  | 
 | ||||||
|  | # HELP gpfs_touch_latency Time to create $N files. | ||||||
|  | # TYPE gpfs_touch_latency gauge | ||||||
|  | gpfs_touch_latency $t | ||||||
|  | EOF | ||||||
| @ -1,7 +1,10 @@ | |||||||
| { config, lib, ... }: | { config, lib, ... }: | ||||||
| 
 | 
 | ||||||
| { | { | ||||||
|   imports = [ ../module/slurm-exporter.nix ]; |   imports = [ | ||||||
|  |     ../module/slurm-exporter.nix | ||||||
|  |     ./gpfs-probe.nix | ||||||
|  |   ]; | ||||||
| 
 | 
 | ||||||
|   age.secrets.grafanaJungleRobotPassword = { |   age.secrets.grafanaJungleRobotPassword = { | ||||||
|     file = ../../secrets/jungle-robot-password.age; |     file = ../../secrets/jungle-robot-password.age; | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user