112 lines
2.9 KiB
Nix
112 lines
2.9 KiB
Nix
{ lib, config, infra, ... }:
|
|
let
|
|
mkScrapeConfig = name: path: port: targets: {
|
|
job_name = name;
|
|
metrics_path = path;
|
|
static_configs = [ { targets = targets; } ];
|
|
relabel_configs = [
|
|
{ source_labels = [ "__address__"]; target_label = "__param_target"; }
|
|
{ source_labels = [ "__param_target"]; target_label = "instance"; }
|
|
{
|
|
source_labels = [ "__param_target"];
|
|
target_label = "__address__";
|
|
replacement = "$1.infra.federez.net:${toString port}";
|
|
}
|
|
];
|
|
};
|
|
nodePort = 9100;
|
|
vmPort = 8428;
|
|
nodesConfig = mkScrapeConfig "node" "/metrics" nodePort
|
|
(lib.attrsets.mapAttrsToList (n: _: n) infra.nodes);
|
|
critical = { severity = "critical"; };
|
|
warning = { severity = "warning"; };
|
|
importRules = path: let
|
|
attrs = import path { inherit critical warning; };
|
|
in lib.attrsets.mapAttrsToList (n: a: a // { alert = n; }) attrs;
|
|
in {
|
|
imports = [
|
|
../../modules/alertbot.nix
|
|
];
|
|
|
|
age.secrets.alertbot-matrix-password = {
|
|
file = ../../secrets/alertbot-matrix-password.age;
|
|
};
|
|
|
|
services.alertbot = {
|
|
enable = true;
|
|
listenPort = 8081;
|
|
matrix = {
|
|
homeserver = "https://matrix.federez.net";
|
|
user = "@alertbot:federez.net";
|
|
passwordFile = config.age.secrets.alertbot-matrix-password.path;
|
|
roomId = "!bVyCrycmkkLXdQRquJ:federez.net";
|
|
};
|
|
};
|
|
|
|
services.victoriametrics = {
|
|
enable = true;
|
|
extraOptions = [ "-enableTCP6" ];
|
|
listenAddress = "localhost:${toString vmPort}";
|
|
prometheusConfig = {
|
|
scrape_configs = [ nodesConfig ];
|
|
};
|
|
};
|
|
|
|
services.vmalert = {
|
|
enable = true;
|
|
rules = {
|
|
groups = [
|
|
{
|
|
name = "common";
|
|
rules = importRules ./rules/common.nix;
|
|
}
|
|
{
|
|
name = "node";
|
|
rules = importRules ./rules/node.nix;
|
|
}
|
|
];
|
|
};
|
|
settings = let
|
|
vmUrl = "http://localhost:${toString vmPort}";
|
|
amUrl = "http://localhost:${toString config.services.prometheus.alertmanager.port}";
|
|
in {
|
|
"datasource.url" = vmUrl;
|
|
"remoteWrite.url" = vmUrl;
|
|
"remoteRead.url" = vmUrl;
|
|
"notifier.url" = [ amUrl ];
|
|
};
|
|
};
|
|
|
|
services.prometheus.alertmanager = {
|
|
enable = true;
|
|
configuration = {
|
|
route = {
|
|
group_by = [ "alertname" "instance" ];
|
|
group_wait = "30s";
|
|
group_interval = "30s";
|
|
repeat_interval = "24h";
|
|
receiver = "webhook";
|
|
};
|
|
inhibit_rules = [
|
|
{
|
|
source_match = critical;
|
|
target_match = warning;
|
|
equal = [ "alertname" "instance" ];
|
|
}
|
|
];
|
|
receivers = [
|
|
{
|
|
name = "webhook";
|
|
webhook_configs = let
|
|
port = config.services.alertbot.listenPort;
|
|
in [
|
|
{
|
|
url = "http://localhost:${toString port}/webhook";
|
|
send_resolved = true;
|
|
}
|
|
];
|
|
}
|
|
];
|
|
};
|
|
};
|
|
}
|