Reviewed-by: Aleix Boné <abonerib@bsc.es> Reviewed-by: Aleix Roca Nonell <aleix.rocanonell@bsc.es>
		
			
				
	
	
		
			273 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
			
		
		
	
	
			273 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
| { config, lib, ... }:
 | |
| 
 | |
| {
 | |
|   imports = [
 | |
|     ../module/slurm-exporter.nix
 | |
|     ../module/meteocat-exporter.nix
 | |
|     ../module/upc-qaire-exporter.nix
 | |
|     ./gpfs-probe.nix
 | |
|     ../module/nix-daemon-exporter.nix
 | |
|   ];
 | |
| 
 | |
|   age.secrets.grafanaJungleRobotPassword = {
 | |
|     file = ../../secrets/jungle-robot-password.age;
 | |
|     owner = "grafana";
 | |
|     mode = "400";
 | |
|   };
 | |
| 
 | |
|   age.secrets.ipmiYml.file = ../../secrets/ipmi.yml.age;
 | |
| 
 | |
|   services.grafana = {
 | |
|     enable = true;
 | |
|     settings = {
 | |
|       server = {
 | |
|         domain = "jungle.bsc.es";
 | |
|         root_url = "%(protocol)s://%(domain)s/grafana";
 | |
|         serve_from_sub_path = true;
 | |
|         http_port = 2342;
 | |
|         http_addr = "127.0.0.1";
 | |
|       };
 | |
|       smtp = {
 | |
|         enabled = true;
 | |
|         from_address = "jungle-robot@bsc.es";
 | |
|         user = "jungle-robot";
 | |
|         # Read the password from a file, which is only readable by grafana user
 | |
|         # https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
 | |
|         password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
 | |
|         host = "mail.bsc.es:465";
 | |
|         startTLS_policy = "NoStartTLS";
 | |
|       };
 | |
|       feature_toggles.publicDashboards = true;
 | |
|       "auth.anonymous".enabled = true;
 | |
|       log.level = "warn";
 | |
|     };
 | |
|   };
 | |
| 
 | |
|   # Make grafana alerts also use the proxy
 | |
|   systemd.services.grafana.environment = config.networking.proxy.envVars;
 | |
| 
 | |
|   services.prometheus = {
 | |
|     enable = true;
 | |
|     port = 9001;
 | |
|     retentionTime = "5y";
 | |
|     listenAddress = "127.0.0.1";
 | |
|   };
 | |
| 
 | |
|   systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false;
 | |
|   systemd.services.prometheus-ipmi-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
 | |
| 
 | |
|   # We need access to the devices to monitor the disk space
 | |
|   systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
 | |
|   systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
 | |
| 
 | |
|   virtualisation.docker.daemon.settings = {
 | |
|     metrics-addr = "127.0.0.1:9323";
 | |
|   };
 | |
| 
 | |
|   # Required to allow the smartctl exporter to read the nvme0 character device,
 | |
|   # see the commit message on:
 | |
|   # https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80
 | |
|   services.udev.extraRules = ''
 | |
|     SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk"
 | |
|   '';
 | |
| 
 | |
|   services.prometheus = {
 | |
| 
 | |
|     exporters = {
 | |
|       ipmi = {
 | |
|         enable = true;
 | |
|         group = "root";
 | |
|         user = "root";
 | |
|         configFile = config.age.secrets.ipmiYml.path;
 | |
|         # extraFlags = [ "--log.level=debug" ];
 | |
|         listenAddress = "127.0.0.1";
 | |
|       };
 | |
|       node = {
 | |
|         enable = true;
 | |
|         enabledCollectors = [ "systemd" "logind" ];
 | |
|         port = 9002;
 | |
|         listenAddress = "127.0.0.1";
 | |
|       };
 | |
|       smartctl = {
 | |
|         enable = true;
 | |
|         listenAddress = "127.0.0.1";
 | |
|       };
 | |
|       blackbox = {
 | |
|         enable = true;
 | |
|         listenAddress = "127.0.0.1";
 | |
|         configFile = ./blackbox.yml;
 | |
|       };
 | |
|     };
 | |
| 
 | |
|     scrapeConfigs = [
 | |
|       {
 | |
|         job_name = "xeon07";
 | |
|         static_configs = [{
 | |
|           targets = [
 | |
|             "127.0.0.1:${toString config.services.prometheus.exporters.node.port}"
 | |
|             "127.0.0.1:${toString config.services.prometheus.exporters.ipmi.port}"
 | |
|             "127.0.0.1:9323"
 | |
|             "127.0.0.1:9252"
 | |
|             "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
 | |
|             "127.0.0.1:9341" # Slurm exporter
 | |
|             "127.0.0.1:9966" # GPFS custom exporter
 | |
|             "127.0.0.1:9999" # Nix-daemon custom exporter
 | |
|             "127.0.0.1:9929" # Meteocat custom exporter
 | |
|             "127.0.0.1:9928" # UPC Qaire custom exporter
 | |
|             "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
 | |
|           ];
 | |
|         }];
 | |
|       }
 | |
|       {
 | |
|         job_name = "ceph";
 | |
|         static_configs = [{
 | |
|           targets = [
 | |
|             "10.0.40.40:9283" # Ceph statistics
 | |
|             "10.0.40.40:9002" # Node exporter
 | |
|             "10.0.40.42:9002" # Node exporter
 | |
|           ];
 | |
|         }];
 | |
|       }
 | |
|       {
 | |
|         job_name = "blackbox-http";
 | |
|         metrics_path = "/probe";
 | |
|         params = { module = [ "http_2xx" ]; };
 | |
|         static_configs = [{
 | |
|           targets = [
 | |
|             "https://www.google.com/robots.txt"
 | |
|             "https://pm.bsc.es/"
 | |
|             "https://pm.bsc.es/gitlab/"
 | |
|             "https://jungle.bsc.es/"
 | |
|             "https://gitlab.bsc.es/"
 | |
|           ];
 | |
|         }];
 | |
|         relabel_configs = [
 | |
|           {
 | |
|             # Takes the address and sets it in the "target=<xyz>" URL parameter
 | |
|             source_labels = [ "__address__" ];
 | |
|             target_label = "__param_target";
 | |
|           }
 | |
|           {
 | |
|             # Sets the "instance" label with the remote host we are querying
 | |
|             source_labels = [ "__param_target" ];
 | |
|             target_label = "instance";
 | |
|           }
 | |
|           {
 | |
|             # Shows the host target address instead of the blackbox address
 | |
|             target_label = "__address__";
 | |
|             replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
 | |
|           }
 | |
|         ];
 | |
|       }
 | |
|       {
 | |
|         job_name = "blackbox-icmp";
 | |
|         metrics_path = "/probe";
 | |
|         params = { module = [ "icmp" ]; };
 | |
|         static_configs = [{
 | |
|           targets = [
 | |
|             "1.1.1.1"
 | |
|             "8.8.8.8"
 | |
|             "ssfhead"
 | |
|             "anella-bsc.cesca.cat"
 | |
|             "upc-anella.cesca.cat"
 | |
|             "fox.ac.upc.edu"
 | |
|             "arenys5.ac.upc.edu"
 | |
|           ];
 | |
|         }];
 | |
|         relabel_configs = [
 | |
|           {
 | |
|             # Takes the address and sets it in the "target=<xyz>" URL parameter
 | |
|             source_labels = [ "__address__" ];
 | |
|             target_label = "__param_target";
 | |
|           }
 | |
|           {
 | |
|             # Sets the "instance" label with the remote host we are querying
 | |
|             source_labels = [ "__param_target" ];
 | |
|             target_label = "instance";
 | |
|           }
 | |
|           {
 | |
|             # Shows the host target address instead of the blackbox address
 | |
|             target_label = "__address__";
 | |
|             replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
 | |
|           }
 | |
|         ];
 | |
|       }
 | |
|       {
 | |
|         job_name = "gitea";
 | |
|         static_configs = [{ targets = [ "127.0.0.1:3000" ]; }];
 | |
|       }
 | |
|       {
 | |
|         # Scrape the IPMI info of the hosts remotely via LAN
 | |
|         job_name = "ipmi-lan";
 | |
|         scrape_interval = "1m";
 | |
|         scrape_timeout = "30s";
 | |
|         metrics_path = "/ipmi";
 | |
|         scheme = "http";
 | |
|         relabel_configs = [
 | |
|           {
 | |
|             # Takes the address and sets it in the "target=<xyz>" URL parameter
 | |
|             source_labels = [ "__address__" ];
 | |
|             separator = ";";
 | |
|             regex = "(.*)(:80)?";
 | |
|             target_label = "__param_target";
 | |
|             replacement = "\${1}";
 | |
|             action = "replace";
 | |
|           }
 | |
|           {
 | |
|             # Sets the "instance" label with the remote host we are querying
 | |
|             source_labels = [ "__param_target" ];
 | |
|             separator = ";";
 | |
|             regex = "(.*)-ipmi"; # Remove "-ipm̀i" at the end
 | |
|             target_label = "instance";
 | |
|             replacement = "\${1}";
 | |
|             action = "replace";
 | |
|           }
 | |
|           {
 | |
|             # Sets the fixed "module=lan" URL param
 | |
|             separator = ";";
 | |
|             regex = "(.*)";
 | |
|             target_label = "__param_module";
 | |
|             replacement = "lan";
 | |
|             action = "replace";
 | |
|           }
 | |
|           {
 | |
|             # Sets the target to query as the localhost IPMI exporter
 | |
|             separator = ";";
 | |
|             regex = ".*";
 | |
|             target_label = "__address__";
 | |
|             replacement = "127.0.0.1:9290";
 | |
|             action = "replace";
 | |
|           }
 | |
|         ];
 | |
| 
 | |
|         # Load the list of targets from another file
 | |
|         file_sd_configs = [
 | |
|           {
 | |
|             files = [ "${./targets.yml}" ];
 | |
|             refresh_interval = "30s";
 | |
|           }
 | |
|         ];
 | |
|       }
 | |
|       {
 | |
|         job_name = "ipmi-raccoon";
 | |
|         metrics_path = "/ipmi";
 | |
|         static_configs = [
 | |
|           { targets = [ "127.0.0.1:9291" ]; }
 | |
|         ];
 | |
|         params = {
 | |
|           target = [ "84.88.51.142" ];
 | |
|           module = [ "raccoon" ];
 | |
|         };
 | |
|       }
 | |
|       {
 | |
|         job_name = "raccoon";
 | |
|         static_configs = [
 | |
|           {
 | |
|             targets = [ "127.0.0.1:19002" ]; # Node exporter
 | |
|           }
 | |
|         ];
 | |
|       }
 | |
|     ];
 | |
|   };
 | |
| }
 |