From ba8e6e1888f5687fc09074e9e10d599840aa1410 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 4 Jun 2025 10:07:12 +0200 Subject: [PATCH 1/3] Monitor GPFS home and projects partitions --- m/hut/gpfs-probe.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/m/hut/gpfs-probe.sh b/m/hut/gpfs-probe.sh index b8f7f82f..220af96a 100755 --- a/m/hut/gpfs-probe.sh +++ b/m/hut/gpfs-probe.sh @@ -2,10 +2,15 @@ N=500 -t=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}") +t_proj=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}") +t_home=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /home/bsc/bsc015557/.gpfs/{1..$N} 2>&1; rm -f /home/bsc/bsc015557/.gpfs/{1..$N}") -if [ -z "$t" ]; then - t="5.00" +if [ -z "$t_proj" ]; then + t_proj="5.00" +fi + +if [ -z "$t_home" ]; then + t_home="5.00" fi cat < Date: Wed, 4 Jun 2025 15:23:15 +0200 Subject: [PATCH 2/3] Monitor tent node --- m/hut/monitoring.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 0c1cb78a..2076cd77 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -267,6 +267,14 @@ } ]; } + { + job_name = "tent"; + static_configs = [ + { + targets = [ "127.0.0.1:29002" ]; # Node exporter + } + ]; + } ]; }; } -- 2.49.0 From e3b9c08748114453ba1240e7ca7003f9af1a19cb Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 10 Jun 2025 11:56:49 +0200 Subject: [PATCH 3/3] Monitor GPFS scratch partition --- m/hut/gpfs-probe.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/m/hut/gpfs-probe.sh b/m/hut/gpfs-probe.sh index 220af96a..ad80897e 100755 --- a/m/hut/gpfs-probe.sh +++ b/m/hut/gpfs-probe.sh @@ -3,12 +3,17 @@ N=500 t_proj=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N} 2>&1; rm -f /gpfs/projects/bsc15/bsc015557/gpfs.{1..$N}") +t_scratch=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /gpfs/scratch/bsc15/rodrigo/probe/gpfs.{1..$N} 2>&1; rm -f /gpfs/scratch/bsc15/rodrigo/probe/gpfs.{1..$N}") t_home=$(timeout 5 ssh bsc015557@glogin2.bsc.es "timeout 3 command time -f %e touch /home/bsc/bsc015557/.gpfs/{1..$N} 2>&1; rm -f /home/bsc/bsc015557/.gpfs/{1..$N}") if [ -z "$t_proj" ]; then t_proj="5.00" fi +if [ -z "$t_scratch" ]; then + t_scratch="5.00" +fi + if [ -z "$t_home" ]; then t_home="5.00" fi @@ -21,4 +26,5 @@ Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values # TYPE gpfs_touch_latency gauge gpfs_touch_latency{partition="projects"} $t_proj gpfs_touch_latency{partition="home"} $t_home +gpfs_touch_latency{partition="scratch"} $t_scratch EOF -- 2.49.0