diff --git a/m/hut/gpfs-probe.nix b/m/hut/gpfs-probe.nix new file mode 100644 index 00000000..46d2c276 --- /dev/null +++ b/m/hut/gpfs-probe.nix @@ -0,0 +1,31 @@ +{ pkgs, config, lib, ... }: +let + gpfs-probe-script = pkgs.runCommand "gpfs-probe.sh" { } + '' + cp ${./gpfs-probe.sh} $out; + chmod +x $out + '' + ; +in +{ + # Use a new user to handle the SSH keys + users.groups.ssh-robot = { }; + users.users.ssh-robot = { + description = "SSH Robot"; + isNormalUser = true; + home = "/var/lib/ssh-robot"; + }; + + systemd.services.gpfs-probe = { + description = "Daemon to report GPFS latency via SSH"; + path = [ pkgs.openssh pkgs.netcat ]; + after = [ "network.target" ]; + wantedBy = [ "default.target" ]; + serviceConfig = { + Type = "simple"; + ExecStart = "${pkgs.socat}/bin/socat -d2 TCP4-LISTEN:9966,fork EXEC:${gpfs-probe-script}"; + User = "ssh-robot"; + Group = "ssh-robot"; + }; + }; +} diff --git a/m/hut/gpfs-probe.sh b/m/hut/gpfs-probe.sh new file mode 100755 index 00000000..5d0a4e28 --- /dev/null +++ b/m/hut/gpfs-probe.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +N=500 + +t=$(ssh bsc015557@glogin2.bsc.es "timeout 5 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}") + +if [ -z "$t" ]; then + t="NaN" +fi + +cat <&2 diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index ec782cd8..f7c7421e 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -1,7 +1,10 @@ { config, lib, ... }: { - imports = [ ../module/slurm-exporter.nix ]; + imports = [ + ../module/slurm-exporter.nix + ./gpfs-probe.nix + ]; age.secrets.grafanaJungleRobotPassword = { file = ../../secrets/jungle-robot-password.age;