nix/profiles/monitoring/default.nix

115 lines
3 KiB
Nix

{ lib, config, network, ... }:
let
cfg = config.services.victoriametrics;
mkScrapeConfig = name: path: port: targets: {
job_name = name;
metrics_path = path;
static_configs = [ { targets = targets; } ];
relabel_configs = [
{ source_labels = [ "__address__"]; target_label = "__param_target"; }
{ source_labels = [ "__param_target"]; target_label = "instance"; }
{
source_labels = [ "__param_target"];
target_label = "__address__";
replacement = "$1.infra.federez.net:${toString port}";
}
];
};
nodePort = 9100;
vmPort = 8428;
nodesConfig = mkScrapeConfig "node" "/metrics" nodePort
(lib.attrsets.mapAttrsToList (n: _: n) network.infra.nodes);
critical = { severity = "critical"; };
warning = { severity = "warning"; };
importRules = path: let
attrs = import path { inherit critical warning; };
in lib.attrsets.mapAttrsToList (n: a: a // { alert = n; }) attrs;
in {
imports = [
../../modules/alertbot.nix
];
age.secrets.alertbot-matrix-password = {
file = ../../secrets/alertbot-matrix-password.age;
};
backups.directories = [ "/var/lib/${cfg.stateDir}" ];
services.alertbot = {
enable = true;
listenPort = 8081;
matrix = {
homeserver = "https://matrix.federez.net";
user = "@alertbot:federez.net";
passwordFile = config.age.secrets.alertbot-matrix-password.path;
roomId = "!bVyCrycmkkLXdQRquJ:federez.net";
};
};
services.victoriametrics = {
enable = true;
extraOptions = [ "-enableTCP6" ];
listenAddress = "localhost:${toString vmPort}";
prometheusConfig = {
scrape_configs = [ nodesConfig ];
};
};
services.vmalert = {
enable = true;
rules = {
groups = [
{
name = "common";
rules = importRules ./rules/common.nix;
}
{
name = "node";
rules = importRules ./rules/node.nix;
}
];
};
settings = let
vmUrl = "http://localhost:${toString vmPort}";
amUrl = "http://localhost:${toString config.services.prometheus.alertmanager.port}";
in {
"datasource.url" = vmUrl;
"remoteWrite.url" = vmUrl;
"remoteRead.url" = vmUrl;
"notifier.url" = [ amUrl ];
};
};
services.prometheus.alertmanager = {
enable = true;
configuration = {
route = {
group_by = [ "alertname" "instance" ];
group_wait = "30s";
group_interval = "30s";
repeat_interval = "24h";
receiver = "webhook";
};
inhibit_rules = [
{
source_match = critical;
target_match = warning;
equal = [ "alertname" "instance" ];
}
];
receivers = [
{
name = "webhook";
webhook_configs = let
port = config.services.alertbot.listenPort;
in [
{
url = "http://localhost:${toString port}/webhook";
send_resolved = true;
}
];
}
];
};
};
}