nix/profiles/monitoring/default.nix

137 lines
4.1 KiB
Nix

{ pkgs, lib, config, network, ... }:
let
cfg = config.services.victoriametrics;
victoriametricsPort = 8428;
alertmanagerPort = config.services.prometheus.alertmanager.port;
alertbotPort = config.services.alertbot.listenPort;
blackboxPort = config.services.prometheus.exporters.blackbox.port;
vmalertPort = 8880;
nodePort = 9100;
mkScrapeConfig = name: config: {
job_name = name;
metrics_path = config.path;
static_configs = [ { targets = config.targets; } ];
params = config.params or { };
relabel_configs = [
{ source_labels = [ "__address__"]; target_label = "__param_target"; }
{ source_labels = [ "__param_target"]; target_label = "instance"; }
{
source_labels = [ "__param_target"];
target_label = "__address__";
replacement = config.replacement;
}
];
};
mkScrapeConfigs = lib.attrsets.mapAttrsToList mkScrapeConfig;
critical = { severity = "critical"; };
warning = { severity = "warning"; };
mkRuleGroups = lib.attrsets.mapAttrsToList (name: path: {
inherit name;
rules = lib.attrsets.mapAttrsToList
(alert: attrs: attrs // { inherit alert; })
(import path { inherit critical warning; });
});
inactiveNodes = [ "wagon" "jargon" "lagon" ];
in {
imports = [
../../modules/alertbot.nix
./blackbox.nix
./alertbot.nix
];
backups.directories = [ "/var/lib/${cfg.stateDir}" ];
services.victoriametrics = {
enable = true;
extraOptions = [
"-enableTCP6"
"-vmalert.proxyURL=http://localhost:${toString vmalertPort}"
];
listenAddress = "localhost:${toString victoriametricsPort}";
prometheusConfig = {
scrape_configs = mkScrapeConfigs {
node = {
path = "/metrics";
replacement = "$1.infra.federez.net:${toString nodePort}";
targets = let
activeNodes = lib.filterAttrs
(n: _: !(builtins.elem n inactiveNodes))
network.infra.nodes;
in
lib.attrsets.mapAttrsToList (n: _: n) activeNodes;
};
blackbox_https_get_200 = {
path = "/probe";
replacement = "localhost:${toString blackboxPort}";
params.module = [ "https_get_200" ];
targets = [
"https://federez.net/"
"https://re2o.federez.net/"
"https://gitlab2.federez.net/federez/nix"
"https://www.federez.net/"
"https://events.federez.net/"
"https://wiki.federez.net/"
"https://wiki-backup.federez.net/"
"https://lists.federez.net/postorius/lists/"
"https://element.federez.net/"
"https://chat.federez.net/login"
"https://nextcloud.federez.net/index.php/login"
"https://watch.federez.net/"
];
};
};
};
};
services.vmalert = {
enable = true;
rules = {
groups = mkRuleGroups {
common = ./rules/common.nix;
node = ./rules/node.nix;
blackbox = ./rules/blackbox.nix;
};
};
settings = let
victoriametricsUrl = "http://localhost:${toString victoriametricsPort}";
alertmanagerUrl = "http://localhost:${toString alertmanagerPort}";
in {
"datasource.url" = victoriametricsUrl;
"remoteWrite.url" = victoriametricsUrl;
"remoteRead.url" = victoriametricsUrl;
"notifier.url" = [ alertmanagerUrl ];
"httpListenAddr" = "localhost:${toString vmalertPort}";
};
};
services.prometheus.alertmanager = {
enable = true;
configuration = {
route = {
group_by = [ "alertname" "instance" ];
group_wait = "30s";
group_interval = "30s";
repeat_interval = "24h";
receiver = "webhook";
};
inhibit_rules = [
{
source_match = critical;
target_match = warning;
equal = [ "alertname" "instance" ];
}
];
receivers = [
{
name = "webhook";
webhook_configs = [
{
url = "http://localhost:${toString alertbotPort}/webhook";
send_resolved = true;
}
];
}
];
};
};
}