jungle/m/hut/monitoring.nix

128 lines
3.6 KiB
Nix
Raw Normal View History

2023-04-05 17:00:01 +02:00
{ config, lib, ... }:
{
services.grafana = {
enable = true;
2023-05-09 18:53:31 +02:00
settings = {
server = {
domain = "jungle.bsc.es";
root_url = "%(protocol)s://%(domain)s/grafana";
serve_from_sub_path = true;
2023-05-09 18:53:31 +02:00
http_port = 2342;
http_addr = "127.0.0.1";
};
feature_toggles.publicDashboards = true;
2023-04-05 17:00:01 +02:00
};
};
services.prometheus = {
enable = true;
port = 9001;
retentionTime = "1y";
2023-04-05 17:00:01 +02:00
};
systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false;
systemd.services.prometheus-ipmi-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
# We need access to the devices to monitor the disk space
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
2023-04-05 17:00:01 +02:00
virtualisation.docker.daemon.settings = {
metrics-addr = "127.0.0.1:9323";
};
2023-04-18 16:03:46 +02:00
# Required to allow the smartctl exporter to read the nvme0 character device,
# see the commit message on:
# https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80
services.udev.extraRules = ''
SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk"
'';
2023-04-05 17:00:01 +02:00
services.prometheus = {
exporters = {
2023-08-17 18:55:40 +02:00
ipmi = {
enable = true;
group = "root";
user = "root";
configFile = ./ipmi.yml;
#extraFlags = [ "--log.level=debug" ];
};
2023-04-05 17:00:01 +02:00
node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9002;
};
2023-04-18 16:03:46 +02:00
smartctl.enable = true;
2023-04-05 17:00:01 +02:00
};
scrapeConfigs = [
{
job_name = "xeon07";
static_configs = [{
targets = [
"127.0.0.1:${toString config.services.prometheus.exporters.node.port}"
"127.0.0.1:${toString config.services.prometheus.exporters.ipmi.port}"
"127.0.0.1:9323"
2023-04-06 13:56:52 +02:00
"127.0.0.1:9252"
2023-04-18 16:03:46 +02:00
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
2023-04-05 17:00:01 +02:00
];
}];
}
2023-08-17 18:55:40 +02:00
{
# Scrape the IPMI info of the hosts remotely via LAN
job_name = "ipmi-lan";
scrape_interval = "1m";
scrape_timeout = "30s";
metrics_path = "/ipmi";
scheme = "http";
relabel_configs = [
{
# Takes the address and sets it in the "target=<xyz>" URL parameter
source_labels = [ "__address__" ];
separator = ";";
regex = "(.*)(:80)?";
target_label = "__param_target";
replacement = "\${1}";
action = "replace";
}
{
# Sets the "instance" label with the remote host we are querying
source_labels = [ "__param_target" ];
separator = ";";
regex = "(.*)";
target_label = "instance";
replacement = "\${1}";
action = "replace";
}
{
# Sets the fixed "module=lan" URL param
separator = ";";
regex = "(.*)";
target_label = "__param_module";
replacement = "lan";
action = "replace";
}
{
# Sets the target to query as the localhost IPMI exporter
separator = ";";
regex = ".*";
target_label = "__address__";
replacement = "127.0.0.1:9290";
action = "replace";
}
];
# Load the list of targets from another file
file_sd_configs = [
{
files = [ "${./targets.yml}" ];
refresh_interval = "30s";
}
];
}
2023-04-05 17:00:01 +02:00
];
};
}