monitoring: refactoring + blackbox

This commit is contained in:
jeltz 2025-06-21 11:49:25 +02:00
parent 59789595d1
commit 698bde5856
Signed by: jeltz
GPG key ID: 800882B66C0C3326
5 changed files with 129 additions and 51 deletions

View file

@ -0,0 +1,22 @@
{ pkgs, lib, config, network, ... }:
{
imports = [
../../modules/alertbot.nix
];
age.secrets.alertbot-matrix-password = {
file = ../../secrets/alertbot-matrix-password.age;
};
services.alertbot = {
enable = true;
listenPort = 8081;
matrix = {
homeserver = "https://matrix.federez.net";
user = "@alertbot:federez.net";
passwordFile = config.age.secrets.alertbot-matrix-password.path;
roomId = "!bVyCrycmkkLXdQRquJ:federez.net";
};
};
}

View file

@ -0,0 +1,35 @@
{ pkgs, lib, config, network, ... }:
let
blackboxConfig = (pkgs.formats.yaml { }).generate "blackbox-config.yml" {
modules = {
https_get_200 = {
prober = "http";
http = {
valid_status_codes = [ 200 ];
method = "GET";
follow_redirects = false;
fail_if_not_ssl = true;
};
};
dns_dodecagon_ptr = {
prober = "dns";
dns = {
query_type = "PTR";
query_name = "162.193.54.193.in-addr.arpa.";
validate_answer_rrs = {
fail_if_not_matches_regexp = [
"162\.193\.54\.193\.in-addr\.arpa\.\t.*\tIN\tPTR\tdodecagon\.federez\.net\."
];
};
};
};
};
};
in {
services.prometheus.exporters.blackbox = {
enable = true;
listenAddress = "localhost";
configFile = blackboxConfig;
};
}

View file

@ -1,82 +1,95 @@
{ lib, config, network, ... }: { pkgs, lib, config, network, ... }:
let let
cfg = config.services.victoriametrics; cfg = config.services.victoriametrics;
mkScrapeConfig = name: path: port: targets: { victoriametricsPort = 8428;
alertmanagerPort = config.services.prometheus.alertmanager.port;
alertbotPort = config.services.alertbot.listenPort;
blackboxPort = config.services.prometheus.exporters.blackbox.port;
nodePort = 9100;
mkScrapeConfig = name: config: {
job_name = name; job_name = name;
metrics_path = path; metrics_path = config.path;
static_configs = [ { targets = targets; } ]; static_configs = [ { targets = config.targets; } ];
params = config.params or { };
relabel_configs = [ relabel_configs = [
{ source_labels = [ "__address__"]; target_label = "__param_target"; } { source_labels = [ "__address__"]; target_label = "__param_target"; }
{ source_labels = [ "__param_target"]; target_label = "instance"; } { source_labels = [ "__param_target"]; target_label = "instance"; }
{ {
source_labels = [ "__param_target"]; source_labels = [ "__param_target"];
target_label = "__address__"; target_label = "__address__";
replacement = "$1.infra.federez.net:${toString port}"; replacement = config.replacement;
} }
]; ];
}; };
nodePort = 9100; mkScrapeConfigs = lib.attrsets.mapAttrsToList mkScrapeConfig;
vmPort = 8428;
nodesConfig = mkScrapeConfig "node" "/metrics" nodePort
(lib.attrsets.mapAttrsToList (n: _: n) network.infra.nodes);
critical = { severity = "critical"; }; critical = { severity = "critical"; };
warning = { severity = "warning"; }; warning = { severity = "warning"; };
importRules = path: let mkRuleGroups = lib.attrsets.mapAttrsToList (name: path: {
attrs = import path { inherit critical warning; }; inherit name;
in lib.attrsets.mapAttrsToList (n: a: a // { alert = n; }) attrs; rules = lib.attrsets.mapAttrsToList
(alert: attrs: attrs // { inherit alert; })
(import path { inherit critical warning; });
});
in { in {
imports = [ imports = [
../../modules/alertbot.nix ../../modules/alertbot.nix
./blackbox.nix
./alertbot.nix
]; ];
age.secrets.alertbot-matrix-password = {
file = ../../secrets/alertbot-matrix-password.age;
};
backups.directories = [ "/var/lib/${cfg.stateDir}" ]; backups.directories = [ "/var/lib/${cfg.stateDir}" ];
services.alertbot = {
enable = true;
listenPort = 8081;
matrix = {
homeserver = "https://matrix.federez.net";
user = "@alertbot:federez.net";
passwordFile = config.age.secrets.alertbot-matrix-password.path;
roomId = "!bVyCrycmkkLXdQRquJ:federez.net";
};
};
services.victoriametrics = { services.victoriametrics = {
enable = true; enable = true;
extraOptions = [ "-enableTCP6" ]; extraOptions = [ "-enableTCP6" ];
listenAddress = "localhost:${toString vmPort}"; listenAddress = "localhost:${toString victoriametricsPort}";
prometheusConfig = { prometheusConfig = {
scrape_configs = [ nodesConfig ]; scrape_configs = mkScrapeConfigs {
node = {
path = "/metrics";
replacement = "$1.infra.federez.net:${toString nodePort}";
targets = lib.attrsets.mapAttrsToList (n: _: n) network.infra.nodes;
};
blackbox_https_get_200 = {
path = "/probe";
replacement = "localhost:${toString blackboxPort}";
params.module = [ "https_get_200" ];
targets = [
"https://federez.net/"
"https://re2o.federez.net/"
"https://gitlab2.federez.net/federez/nix"
"https://www.federez.net/"
"https://events.federez.net/"
"https://wiki.federez.net/"
"https://wiki-backup.federez.net/"
"https://lists.federez.net/postorius/lists/"
"https://element.federez.net/"
"https://chat.federez.net/login"
"https://nextcloud.federez.net/index.php/login"
"https://watch.federez.net/"
];
};
};
}; };
}; };
services.vmalert = { services.vmalert = {
enable = true; enable = true;
rules = { rules = {
groups = [ groups = mkRuleGroups {
{ common = ./rules/common.nix;
name = "common"; node = ./rules/node.nix;
rules = importRules ./rules/common.nix; blackbox = ./rules/blackbox.nix;
} };
{
name = "node";
rules = importRules ./rules/node.nix;
}
];
}; };
settings = let settings = let
vmUrl = "http://localhost:${toString vmPort}"; victoriametricsUrl = "http://localhost:${toString victoriametricsPort}";
amUrl = "http://localhost:${toString config.services.prometheus.alertmanager.port}"; alertmanagerUrl = "http://localhost:${toString alertmanagerPort}";
in { in {
"datasource.url" = vmUrl; "datasource.url" = victoriametricsUrl;
"remoteWrite.url" = vmUrl; "remoteWrite.url" = victoriametricsUrl;
"remoteRead.url" = vmUrl; "remoteRead.url" = victoriametricsUrl;
"notifier.url" = [ amUrl ]; "notifier.url" = [ alertmanagerUrl ];
}; };
}; };
@ -100,11 +113,9 @@ in {
receivers = [ receivers = [
{ {
name = "webhook"; name = "webhook";
webhook_configs = let webhook_configs = [
port = config.services.alertbot.listenPort;
in [
{ {
url = "http://localhost:${toString port}/webhook"; url = "http://localhost:${toString alertbotPort}/webhook";
send_resolved = true; send_resolved = true;
} }
]; ];

View file

@ -0,0 +1,10 @@
{ critical, ... }:
{
BlackboxHttps200Failure = {
expr = ''
probe_success{job="blackbox_https_get_200"} != 1
'';
for = "3m";
labels = critical;
};
}

View file

@ -21,7 +21,7 @@
NodeSwapIsFillingUp = { NodeSwapIsFillingUp = {
expr = '' expr = ''
(1 - (node_memory_SwapFree_bytes (1 - (node_memory_SwapFree_bytes
/ node_memory_SwapTotal_bytes)) > 0.5 / node_memory_SwapTotal_bytes)) > 0.75
''; '';
for = "1m"; for = "1m";
labels = critical; labels = critical;
@ -265,7 +265,7 @@
NodeLoad5Usage = { NodeLoad5Usage = {
expr = '' expr = ''
node_load5 / ( node_load5 / (
count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1.0 count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1.1
''; '';
for = "1m"; for = "1m";
labels = warning; labels = warning;