From b8a6cbfeb5f78fa25121752a9e9fe6016b76c217 Mon Sep 17 00:00:00 2001 From: Jeltz Date: Sat, 21 Jun 2025 23:14:39 +0200 Subject: [PATCH 1/4] bug: fix borgmatic when no pgsql + vogon wg secret path --- profiles/backups.nix | 3 +-- profiles/vogon/host.nix | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/profiles/backups.nix b/profiles/backups.nix index 7f757a6..f338a82 100644 --- a/profiles/backups.nix +++ b/profiles/backups.nix @@ -6,10 +6,9 @@ let postgresql = config.services.postgresql.package; additionalPackages = [ pkgs.coreutils - postgresql pkgs.sudo pkgs.sqlite - ]; + ] ++ lib.optionals (builtins.length cfg.postgresqlDatabases > 0) [ postgresql ]; remotes = { memoragon = { host = "memoragon.infra.federez.net"; diff --git a/profiles/vogon/host.nix b/profiles/vogon/host.nix index b11d11b..c1721b8 100644 --- a/profiles/vogon/host.nix +++ b/profiles/vogon/host.nix @@ -101,7 +101,7 @@ age.secrets = { vogon-wg-infra-key = { - file = ../secrets/vogon-wg-infra-key.age; + file = ../../secrets/vogon-wg-infra-key.age; owner = "root"; group = "root"; }; From a1c4879a38100e06a458ff9d3517851503502843 Mon Sep 17 00:00:00 2001 From: Jeltz Date: Sat, 21 Jun 2025 23:39:03 +0200 Subject: [PATCH 2/4] sysadmin: add asyncnomi's ssh key --- profiles/sysadmin.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/profiles/sysadmin.nix b/profiles/sysadmin.nix index ddfc053..5b28f4b 100644 --- a/profiles/sysadmin.nix +++ b/profiles/sysadmin.nix @@ -4,6 +4,7 @@ ../pubkeys/bensmrs.keys ../pubkeys/tomate.keys ../pubkeys/jeltz.keys + ../pubkeys/asyncnomi.keys ]; backups.directories = [ "/root" ]; From 4e6513466f50b046ec4d2ee7101abe533797cdb0 Mon Sep 17 00:00:00 2001 From: Jeltz Date: Sun, 6 Jul 2025 23:02:21 +0200 Subject: [PATCH 3/4] monitoring: add support for Alert list visualisation in Grafana --- profiles/grafana.nix | 4 +++- profiles/monitoring/default.nix | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/profiles/grafana.nix b/profiles/grafana.nix index 093b7c9..dfd5db0 100644 --- a/profiles/grafana.nix +++ b/profiles/grafana.nix @@ -86,7 +86,9 @@ in { [ { name = "VictoriaMetrics"; - type = "victoriametrics-metrics-datasource"; + # See https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/59 + type = "prometheus"; + #type = "victoriametrics-metrics-datasource"; uid = "vm"; url = "http://localhost:8428"; editable = false; diff --git a/profiles/monitoring/default.nix b/profiles/monitoring/default.nix index 689fe74..15e75f1 100644 --- a/profiles/monitoring/default.nix +++ b/profiles/monitoring/default.nix @@ -5,6 +5,7 @@ let alertmanagerPort = config.services.prometheus.alertmanager.port; alertbotPort = config.services.alertbot.listenPort; blackboxPort = config.services.prometheus.exporters.blackbox.port; + vmalertPort = 8880; nodePort = 9100; mkScrapeConfig = name: config: { job_name = name; @@ -30,6 +31,7 @@ let (alert: attrs: attrs // { inherit alert; }) (import path { inherit critical warning; }); }); + inactiveNodes = [ "wagon" "jargon" "lagon" ]; in { imports = [ ../../modules/alertbot.nix @@ -41,14 +43,22 @@ in { services.victoriametrics = { enable = true; - extraOptions = [ "-enableTCP6" ]; + extraOptions = [ + "-enableTCP6" + "-vmalert.proxyURL=http://localhost:${toString vmalertPort}" + ]; listenAddress = "localhost:${toString victoriametricsPort}"; prometheusConfig = { scrape_configs = mkScrapeConfigs { node = { path = "/metrics"; replacement = "$1.infra.federez.net:${toString nodePort}"; - targets = lib.attrsets.mapAttrsToList (n: _: n) network.infra.nodes; + targets = let + activeNodes = lib.filterAttrs + (n: _: !(builtins.elem n inactiveNodes)) + network.infra.nodes; + in + lib.attrsets.mapAttrsToList (n: _: n) activeNodes; }; blackbox_https_get_200 = { path = "/probe"; @@ -90,6 +100,7 @@ in { "remoteWrite.url" = victoriametricsUrl; "remoteRead.url" = victoriametricsUrl; "notifier.url" = [ alertmanagerUrl ]; + "httpListenAddr" = "localhost:${toString vmalertPort}"; }; }; From 495f51725b56b33b4f901be1dd2851a34518f812 Mon Sep 17 00:00:00 2001 From: Jeltz Date: Sun, 6 Jul 2025 23:03:08 +0200 Subject: [PATCH 4/4] monitoring: fix typo + increase threshold for load5 alert --- profiles/monitoring/rules/node.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiles/monitoring/rules/node.nix b/profiles/monitoring/rules/node.nix index 0b62631..efafc15 100644 --- a/profiles/monitoring/rules/node.nix +++ b/profiles/monitoring/rules/node.nix @@ -131,7 +131,7 @@ NodePhysicalComponentTooHot = { expr = '' - node_hwmon_temp_celsius > clamp_max(79, node_hwmon_temp_max_celsius) + node_hwmon_temp_celsius > clamp_max(node_hwmon_temp_max_celsius, 79) ''; for = "0m"; labels = critical; @@ -265,7 +265,7 @@ NodeLoad5Usage = { expr = '' node_load5 / ( - count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1.1 + count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1.25 ''; for = "1m"; labels = warning;