From a64b34810d223a800086be33570ed0b40aa7ed06 Mon Sep 17 00:00:00 2001 From: Jeltz Date: Sat, 5 Apr 2025 17:33:42 +0200 Subject: [PATCH] wip: nixpkgs versions + infra network + monitoring Signed-off-by: Jeltz --- hive.nix | 82 +++--- modules/alertbot.nix | 86 ++++++ network/default.nix | 11 + network/infra.nix | 77 ++++++ network/options.nix | 127 +++++++++ pkgs/alertbot/default.nix | 23 ++ pkgs/alertbot/src/README.md | 1 + pkgs/alertbot/src/pyproject.toml | 83 ++++++ pkgs/alertbot/src/src/alertbot/__init__.py | 0 pkgs/alertbot/src/src/alertbot/__main__.py | 180 +++++++++++++ pkgs/indico/package_lock_git.patch | 20 ++ profiles/grafana.nix | 43 ++- profiles/infra-net.nix | 217 ---------------- profiles/infra.nix | 167 ++++++++++++ profiles/monitoring/default.nix | 112 ++++++++ profiles/monitoring/rules/common.nix | 9 + profiles/monitoring/rules/node.nix | 287 +++++++++++++++++++++ profiles/netdata.nix | 88 ------- profiles/prometheus-node-exporter.nix | 10 + profiles/victoria.nix | 16 -- profiles/vogon.nix | 190 ++++---------- secrets/alertbot-matrix-password.age | 32 +++ secrets/grafana-ldap-bind-password.age | Bin 0 -> 1763 bytes secrets/secrets.nix | 15 +- 24 files changed, 1363 insertions(+), 513 deletions(-) create mode 100644 modules/alertbot.nix create mode 100644 network/default.nix create mode 100644 network/infra.nix create mode 100644 network/options.nix create mode 100644 pkgs/alertbot/default.nix create mode 100644 pkgs/alertbot/src/README.md create mode 100644 pkgs/alertbot/src/pyproject.toml create mode 100644 pkgs/alertbot/src/src/alertbot/__init__.py create mode 100644 pkgs/alertbot/src/src/alertbot/__main__.py create mode 100644 pkgs/indico/package_lock_git.patch delete mode 100644 profiles/infra-net.nix create mode 100644 profiles/infra.nix create mode 100644 profiles/monitoring/default.nix create mode 100644 profiles/monitoring/rules/common.nix create mode 100644 profiles/monitoring/rules/node.nix delete mode 100644 profiles/netdata.nix create mode 100644 profiles/prometheus-node-exporter.nix delete mode 100644 profiles/victoria.nix create mode 100644 secrets/alertbot-matrix-password.age create mode 100644 secrets/grafana-ldap-bind-password.age diff --git a/hive.nix b/hive.nix index 7f0c8d7..410cf9e 100644 --- a/hive.nix +++ b/hive.nix @@ -1,20 +1,31 @@ let src = import ./npins; - pkgs = import src.nixpkgs { + disko = (import src.disko { inherit (nixpkgsDefault) lib; }); + diskConfig = import ./disks/ext4.nix { + # FIXME mauvaise version… + inherit (nixpkgsDefault) lib; + }; + mkSpecialArgs = nixpkgs: { + network = import ./network { + inherit (nixpkgs) lib; + }; + }; + nixpkgsDefault = import src.nixpkgs { config.permittedInsecurePackages = [ "olm-3.2.16" ]; }; - disko = (import src.disko { inherit (pkgs) lib; }); - diskConfig = import ./disks/ext4.nix { - inherit (pkgs) lib; + nixpkgs2411 = import src."nixpkgs-24.11" { }; + nodeNixpkgs = { + # FIXME discourse est cassé en unstable + pendragon = nixpkgs2411; + martagon = nixpkgs2411; }; in { meta = { - nixpkgs = pkgs; - nodeNixpkgs = { - # FIXME discourse est cassé en unstable - pendragon = src."nixpkgs-24.11"; - }; + nixpkgs = nixpkgsDefault; + nodeNixpkgs = nodeNixpkgs; + specialArgs = mkSpecialArgs nixpkgsDefault; + nodeSpecialArgs = builtins.mapAttrs (_: mkSpecialArgs) nodeNixpkgs; }; # FIXME @@ -23,6 +34,8 @@ in defaults = { pkgs, lib, ... }: { imports = [ ./profiles/sysadmin.nix + ./profiles/infra.nix + ./profiles/prometheus-node-exporter.nix #./profiles/ldap.nix "${src.agenix}/modules/age.nix" ]; @@ -38,12 +51,14 @@ in services.openssh.enable = true; networking.nftables.enable = true; + infra.enabled = true; + # Enable system diffs. system.activationScripts.system-diff = { supportsDryActivation = true; # safe: only outputs to stdout text = '' if [ -e /run/current-system ]; then - PATH=$PATH:${pkgs.nix}/bin ${pkgs.nvd}/bin/nvd diff /run/current-system $systemConfig + PATH=$PATH:${pkgs.nix}/bin ${pkgs.nvd}/bin/nvd diff /run/current-system $systemConfig fi ''; }; @@ -72,16 +87,10 @@ in wan-mac = "BC:24:11:5C:A4:5A"; }; - infra-net.leaf = { - mac = "BC:24:11:AC:7B:59"; - id = 12; - }; - imports = [ (disko.config diskConfig) ./profiles/vm.nix ./profiles/glucagon.nix - ./profiles/infra-net.nix ./profiles/matrix-server.nix ./profiles/element.nix ./profiles/telegram-bot.nix @@ -99,16 +108,10 @@ in wan-mac = "BC:24:11:EA:6C:0B"; }; - infra-net.leaf = { - mac = "BC:24:11:5A:0F:44"; - id = 8; - }; - imports = [ (disko.config diskConfig) ./profiles/vm.nix ./profiles/glucagon.nix - ./profiles/infra-net.nix ./profiles/vaultwarden.nix ]; @@ -123,16 +126,10 @@ in wan-mac = "BC:24:11:7F:19:60"; }; - infra-net.leaf = { - mac = "BC:24:11:91:61:8E"; - id = 9; - }; - imports = [ (disko.config diskConfig) ./profiles/vm.nix ./profiles/glucagon.nix - ./profiles/infra-net.nix ./profiles/wayf.nix ]; @@ -147,16 +144,10 @@ in wan-mac = "BC:24:11:E3:12:4A"; }; - infra-net.leaf = { - mac = "BC:24:11:E4:C7:69"; - id = 10; - }; - imports = [ (disko.config diskConfig) ./profiles/vm.nix ./profiles/glucagon.nix - ./profiles/infra-net.nix ./profiles/gitlab.nix ]; @@ -171,16 +162,10 @@ in wan-mac = "BC:24:11:C2:AA:47"; }; - infra-net.leaf = { - mac = "BC:24:11:31:B8:DD"; - id = 11; - }; - imports = [ (disko.config diskConfig) ./profiles/vm.nix ./profiles/glucagon.nix - ./profiles/infra-net.nix ./profiles/discourse.nix ]; @@ -195,23 +180,16 @@ in wan-mac = "BC:24:11:04:9B:51"; }; - infra-net.leaf = { - mac = "BC:24:11:09:B8:76"; - id = 17; - }; - imports = [ (disko.config diskConfig) ./profiles/vm.nix + ./profiles/glucagon.nix ./profiles/indico.nix ]; }; - martagon = { name, nodes, ... }: { + martagon = { pkgs, ... }: { deployment.tags = [ "victoria" "grafana" ]; - deployment.targetHost = "martagon.federez.net"; - federez.monitoring.apiKey = "370a181d-6b00-4c3d-af27-ca65e6e4c1b0"; - networking.hostName = name; glucagon.networking = { nibble = 236; @@ -219,9 +197,13 @@ in }; imports = [ + (disko.config diskConfig) ./profiles/vm.nix - ./profiles/victoria.nix + ./profiles/glucagon.nix + ./profiles/monitoring ./profiles/grafana.nix ]; + + system.build.diskoScript = disko.diskoScript diskConfig pkgs; }; } diff --git a/modules/alertbot.nix b/modules/alertbot.nix new file mode 100644 index 0000000..a246b39 --- /dev/null +++ b/modules/alertbot.nix @@ -0,0 +1,86 @@ +{ + config, + lib, + pkgs, + ... +}: + +let + cfg = config.services.alertbot; + alertbot = pkgs.callPackage ../pkgs/alertbot { }; + configFile = (pkgs.formats.toml { }).generate "config.yaml" { + listen_port = cfg.listenPort; + matrix = { + homeserver = cfg.matrix.homeserver; + user = cfg.matrix.user; + password_cred = "matrix-password"; + room_id = cfg.matrix.roomId; + }; + }; +in { + options.services.alertbot = { + enable = lib.mkEnableOption "alertbot"; + + listenPort = lib.mkOption { + type = lib.types.port; + description = "Listen port."; + }; + + user = lib.mkOption { + type = lib.types.str; + default = "alertbot"; + description = "User under which alertbot should run."; + }; + + group = lib.mkOption { + type = lib.types.str; + default = "alertbot"; + description = "User under which alertbot should run."; + }; + + matrix = { + homeserver = lib.mkOption { + type = lib.types.str; + description = "Homeserver URL."; + }; + + user = lib.mkOption { + type = lib.types.str; + description = "User ID."; + }; + + passwordFile = lib.mkOption { + type = lib.types.path; + description = "Password file path."; + }; + + roomId = lib.mkOption { + type = lib.types.str; + description = "Room ID."; + }; + }; + }; + + config = lib.mkIf cfg.enable { + users.users.${cfg.user} = { + isSystemUser = true; + group = cfg.group; + }; + + users.groups.${cfg.group} = { }; + + systemd.services.alertbot = { + description = "Alertbot service"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + LoadCredential = [ "matrix-password:${cfg.matrix.passwordFile}" ]; + User = cfg.user; + Group = cfg.group; + ExecStart = '' + ${lib.getExe' alertbot "alertbot"} -c ${configFile} + ''; + }; + }; + }; +} diff --git a/network/default.nix b/network/default.nix new file mode 100644 index 0000000..012e9c7 --- /dev/null +++ b/network/default.nix @@ -0,0 +1,11 @@ +{ lib, ... }: + +let + result = lib.evalModules { + modules = [ + ./options.nix + ./infra.nix + ]; + }; +in +result.config diff --git a/network/infra.nix b/network/infra.nix new file mode 100644 index 0000000..fd9c7ef --- /dev/null +++ b/network/infra.nix @@ -0,0 +1,77 @@ +{ lib, ... }: + +let + toStringFixed = k: n: lib.fixedWidthString k "0" (toString n); + mkNode = id: let + a = id / 256; + b = id - 256 * a; + suffix = "${toStringFixed 3 a}${toStringFixed 3 b}"; + suffixA = builtins.substring 0 2 suffix; + suffixB = builtins.substring 2 2 suffix; + suffixC = builtins.substring 4 2 suffix; + in { + ipv6 = "fd0a:66d3:1c19:42::${toString a}:${toString b}"; + ipv4 = "10.42.${toString a}.${toString b}"; + mac = "42:42:42:${suffixA}:${suffixB}:${suffixC}"; + id = id; + }; + mkHub = hub: hub // { + ipv6 = "fd0a:66d3:1c19:1000::${toString hub.id}"; + }; +in { + infra = { + vxlan = { + vni = 42; + port = 4789; + }; + cidr = { + hubs.ipv6 = 64; + nodes = { + ipv4 = 16; + ipv6 = 64; + }; + }; + nodes = builtins.mapAttrs (_: mkNode) { + vogon = 1; + glucagon = 2; + ronderu = 3; + dodecagon = 4; + saigon = 5; + harpagon = 6; + patagon = 7; + wagon = 8; + lagon = 9; + aragon = 10; + pendragon = 11; + estragon = 12; + carlosgon = 13; + memoragon = 14; + hendecagon = 15; + dragon = 16; + perdrigon = 17; + martagon = 18; + }; + hubs = builtins.mapAttrs (_: mkHub) { + vogon = { + id = 1; + publicKey = "d5vEJUiTFQh+MPQcU2JTaJ9lcsvzuoZkohxzeOigiVQ="; + endpoint = "193.54.193.161:51039"; + }; + glucagon = { + id = 2; + publicKey = "JfTsY3+jPTDgLDrECoSvoYs+6+GpjII0ookjhFhd5SY="; + endpoint = "89.234.162.224:51039"; + }; + ronderu = { + id = 3; + publicKey = "nOeLgmE1U6nY3UNxltQKwlID9lD7fvpEwij2XUvEGgg="; + endpoint = "137.194.12.129:51039"; + }; + saigon = { + id = 4; + publicKey = "9pGyE4+CQl+f8sFJ/Mkvp14yxDQJ0SJmGnher5Tgzjc="; + endpoint = "193.48.225.201:51039"; + }; + }; + }; +} diff --git a/network/options.nix b/network/options.nix new file mode 100644 index 0000000..b274585 --- /dev/null +++ b/network/options.nix @@ -0,0 +1,127 @@ +{ lib, ... }: +let + nodeSubmodule = lib.types.submodule { + options = { + id = lib.mkOption { + type = lib.types.ints.between 1 65535; + description = '' + Identifiant du nœud. + ''; + example = 350; + }; + ipv4 = lib.mkOption { + type = lib.types.str; + description = '' + Adresse IPv4 interne du nœud. + ''; + example = "10.42.1.94"; + }; + ipv6 = lib.mkOption { + type = lib.types.str; + description = '' + Adresse IPv6 interne du nœud. + ''; + example = "fd0a:66d3:1c19:42::1:94"; + }; + mac = lib.mkOption { + type = lib.types.str; + description = '' + Adresse MAC interne du nœud. + ''; + example = "42:42:42:00:10:94"; + }; + }; + }; + hubSubmodule = lib.types.submodule { + options = { + id = lib.mkOption { + type = lib.types.ints.between 1 254; + description = '' + Identifiant du concentrateur. + ''; + example = 12; + }; + publicKey = lib.mkOption { + type = lib.types.str; + description = '' + Clé publique du concentrateur. + ''; + example = "pn8PoOZnlT+CUjI0lyILNhj7/7TMaNr+DmWbtWyj+Bg="; + }; + endpoint = lib.mkOption { + type = lib.types.str; + description = '' + Adresse et port publics du concentrateur. + ''; + example = "1.2.3.4:54050"; + }; + ipv6 = lib.mkOption { + type = lib.types.str; + description = '' + Adresse IPv6 interne du concentrateur. + ''; + example = "fd0a:66d3:1c19:1000::12"; + }; + }; + }; +in { + options = { + infra = { + vxlan = { + vni = lib.mkOption { + type = lib.types.ints.between 1 16777215; + description = '' + Identifiant de VXLAN du réseau INFRA. + ''; + example = 42; + }; + port = lib.mkOption { + type = lib.types.port; + description = '' + Numéro de port du trafic VXLAN. + ''; + example = 4789; + }; + }; + cidr = { + nodes = { + ipv4 = lib.mkOption { + type = lib.types.ints.between 1 32; + description = '' + Taille du réseau IPv4 interne des nœuds INFRA. + ''; + example = 16; + }; + ipv6 = lib.mkOption { + type = lib.types.ints.between 1 128; + description = '' + Taille du réseau IPv6 interne des nœuds INFRA. + ''; + example = 64; + }; + }; + hubs.ipv6 = lib.mkOption { + type = lib.types.ints.between 1 128; + description = '' + Taille du réseau IPv6 interne des concentrateurs INFRA. + ''; + example = 64; + }; + }; + nodes = lib.mkOption { + type = lib.types.attrsOf nodeSubmodule; + default = { }; + description = '' + Nœuds du réseau INFRA. + ''; + }; + hubs = lib.mkOption { + type = lib.types.attrsOf hubSubmodule; + default = { }; + description = '' + Concentrateurs du réseau INFRA. + ''; + }; + }; + }; +} diff --git a/pkgs/alertbot/default.nix b/pkgs/alertbot/default.nix new file mode 100644 index 0000000..f29e4d3 --- /dev/null +++ b/pkgs/alertbot/default.nix @@ -0,0 +1,23 @@ +{ + lib, + python3, +}: + +python3.pkgs.buildPythonApplication rec { + pname = "alertbot"; + version = "1.0.0"; + pyproject = true; + + disabled = python3.pythonOlder "3.12"; + + src = ./src; + + build-system = [ python3.pkgs.hatchling ]; + + dependencies = with python3.pkgs; [ pydantic aiohttp matrix-nio jinja2 ]; + + meta = { + description = "Alertmanager Matrix Bot"; + license = lib.licenses.agpl3Only; + }; +} diff --git a/pkgs/alertbot/src/README.md b/pkgs/alertbot/src/README.md new file mode 100644 index 0000000..111e425 --- /dev/null +++ b/pkgs/alertbot/src/README.md @@ -0,0 +1 @@ +# Alertbot diff --git a/pkgs/alertbot/src/pyproject.toml b/pkgs/alertbot/src/pyproject.toml new file mode 100644 index 0000000..8e13594 --- /dev/null +++ b/pkgs/alertbot/src/pyproject.toml @@ -0,0 +1,83 @@ +[project] +name = "alertbot" +version = "1.0.0" +description = "Alertmanager Matrix Bot" +readme = "README.md" +requires-python = ">=3.13" +license = "AGPL-3.0" +authors = [ + { name = "Tom Barthe", email = "tba@federez.net" }, +] +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: CPython", +] +dependencies = [ + "aiohttp", + "pydantic >= 2.0.0", + "matrix-nio", + "jinja2", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.scripts] +alertbot = "alertbot.__main__:main" + +[tool.hatch.build.targets.wheel] +packages = ["src/alertbot"] + +[tool.mypy] +strict = true + +[tool.ruff] +line-length = 79 + +[tool.ruff.lint] +select = [ + "F", # Pyflakes + "E", # pycodestyle errors + "W", # pycodestyle warnings + "I", # isort + "N", # pep8-naming + # "D", # pydocstyle + "UP", # pyupgrade + "YTT", # flake8-2020 + "ANN", # flake8-annotations + "ASYNC", # flake8-async + "S", # flake8-bandit + "BLE", # flake8-blind-except + "A", # flake8-builtins + "C4", # flake8-comprehensions + "DTZ", # flake8-datetimez + "LOG", # flak8-logging + "G", # flak8-logging-format + "INP", # flak8-no-pep420 + "PIE", # flak8-pie + "PYI", # flak8-pyi + "Q", # flak8-quotes + "RSE", # flake8-raise + "RET", # flake8-return + "SLF", # flake8-self + "SLOT", # flake8-slots + "SIM", # flake8-simplify + "TID", # flake8-tidy-imports + "ARG", # flake8-unused-arguments + "PTH", # flake8-use-pathlib + #"TD", # flake8-todos + "FIX", # flake8-fixme + "ERA", # eradicate + "PLC", # Pylint convention + "PLE", # Pylint error + "PLR", # Pylint refactor + "PLW", # Pylint warning + #"TRY", # tryceratops + "FLY", # flynt + "PERF", # Perflint + "FURB", # refurb + "RUF", # Ruff +] +ignore = [] diff --git a/pkgs/alertbot/src/src/alertbot/__init__.py b/pkgs/alertbot/src/src/alertbot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pkgs/alertbot/src/src/alertbot/__main__.py b/pkgs/alertbot/src/src/alertbot/__main__.py new file mode 100644 index 0000000..67f3b05 --- /dev/null +++ b/pkgs/alertbot/src/src/alertbot/__main__.py @@ -0,0 +1,180 @@ +import logging +from argparse import ArgumentParser +from asyncio import CancelledError, Queue, create_task +from contextlib import asynccontextmanager, suppress +from functools import cache +from os import environ +from pathlib import Path +from tomllib import load +from typing import Any + +from aiohttp.web import AppKey, Application, Request, Response, post, run_app +from jinja2 import Environment +from nio import AsyncClient +from pydantic import BaseModel + +TEMPLATE_TEXT = ( + "{% if status == 'resolved' %}" + "\x02\x0303RÉSOLU\x03\x02 " + "{% elif labels.severity == 'critical' %}" + "\x02\x0304CRITIQUE\x03\x02 " + "{% elif labels.severity == 'warning' %}" + "\x02\x0307ATTENTION\x03\x02 " + "{% endif %}" + "\x02{{ labels.alertname }}\x02" + "{% if labels.instance is defined %} {{ labels.instance }}{% endif %}" + "{% if annotations.summary is defined %}" + "\n{{ annotations.summary }}" + "{% else %}" + "{% for key, value in annotations.items() %}" + "\n \x02{{ key }} :\x02 {{ value }}" + "{% endfor %}" + "{% endif %}" +) + +TEMPLATE_HTML = ( + "{% if status == 'resolved' %}" + "RÉSOLU " + "{% elif labels.severity == 'critical' %}" + "@room CRITIQUE " + "{% elif labels.severity == 'warning' %}" + "ATTENTION " + "{% endif %}" + "{{ labels.alertname }}" + "{% if labels.instance is defined %} {{ labels.instance }}{% endif %}" + "{% if annotations.summary is defined %}" + "
{{ annotations.summary }}
" + "{% else %}" + "
" + "
" + "{% for key, value in annotations.items() %}" + "{{ key | capitalize }} : {{ value }}
" + "{% endfor %}" + "" + "
" + "{% endif %}" +) + + +class MatrixConfig(BaseModel): + homeserver: str + user: str + password_cred: str + room_id: str + + +class Config(BaseModel): + matrix: MatrixConfig + listen_port: int + + +alert_queue = AppKey("alert_queue", Queue[Any]) +config = AppKey("config", Config) + + +@cache +def read_cred(name: str) -> str: + creds_dir = Path(environ["CREDENTIALS_DIRECTORY"]) + return (creds_dir / name).read_text() + + +async def handle_webhook(request: Request) -> Response: + message = await request.json() + alerts = message.get("alerts", []) + logging.info("Incoming message received: %s", message) + + for alert in alerts: + await request.app[alert_queue].put(alert) + + return Response() + + +async def post_alerts( + client: AsyncClient, queue: Queue[Any], room_id: str +) -> None: + env = Environment(autoescape=True) + text = env.from_string(TEMPLATE_TEXT) + html = env.from_string(TEMPLATE_HTML) + + while True: + alert = await queue.get() + logging.info("Posting alert: %s", alert) + try: + await client.room_send( + room_id, + message_type="m.room.message", + content={ + "msgtype": "m.text", + "format": "org.matrix.custom.html", + "body": text.render(**alert), + "formatted_body": html.render(**alert), + }, + ) + except Exception: + logging.exception("Error while posting alert") + + +@asynccontextmanager +async def make_matrix_client( + homeserver: str, user: str, password: str, device_name: str +) -> AsyncClient: + client = AsyncClient(homeserver, user) + try: + logging.info("Logging in to %s as %s", homeserver, user) + await client.login(password, device_name=device_name) + yield client + finally: + logging.info("Closing matrix client session") + await client.close() + + +async def message_ctx_cleanup(app: Application) -> None: + homeserver = app[config].matrix.homeserver + user = app[config].matrix.user + password = read_cred(app[config].matrix.password_cred) + queue = Queue() + + async with make_matrix_client( + homeserver, user, password, "Alertbot" + ) as client: + task = create_task( + post_alerts(client, queue, app[config].matrix.room_id) + ) + app[alert_queue] = queue + logging.info("Post alerts task created") + + yield + + logging.info("Cancelling post alert task") + task.cancel() + with suppress(CancelledError): + await task + + +def main() -> None: + logging.basicConfig(level=logging.INFO) + + parser = ArgumentParser() + parser.add_argument( + "-c", + "--config", + type=Path, + default="config.toml", + ) + args = parser.parse_args() + + app = Application() + + with args.config.open("rb") as f: + app[config] = Config.model_validate(load(f)) + + app.add_routes([post("/webhook", handle_webhook)]) + app.cleanup_ctx.append(message_ctx_cleanup) + + port = app[config].listen_port + logging.info("Starting Alertbot on port %s", port) + run_app(app, port=port, handler_cancellation=True) + + +if __name__ == "__main__": + main() diff --git a/pkgs/indico/package_lock_git.patch b/pkgs/indico/package_lock_git.patch new file mode 100644 index 0000000..262b659 --- /dev/null +++ b/pkgs/indico/package_lock_git.patch @@ -0,0 +1,20 @@ +--- a/package-lock.json 2025-02-16 07:50:02.223758771 +0100 ++++ b/package-lock.json 2025-02-16 07:50:54.208768359 +0100 +@@ -57,7 +57,7 @@ + "process": "^0.11.10", + "prop-types": "^15.8.1", + "qs": "^6.11.0", +- "qtip2": "git+https://indico@github.com/indico/qTip2.git#8951e5538a5c0833021b2d2b5d8a587a2c24faae", ++ "qtip2": "file://@qTip2Tarball@", + "rc-time-picker": "^3.7.3", + "react": "^17.0.2", + "react-charts": "2.0.0-beta.7", +@@ -14265,7 +14265,7 @@ + }, + "node_modules/qtip2": { + "version": "3.0.3", +- "resolved": "git+https://indico@github.com/indico/qTip2.git#8951e5538a5c0833021b2d2b5d8a587a2c24faae", ++ "resolved": "file://@qTip2Tarball@", + "integrity": "sha512-U/oUhSv0FpWevgmJFbv4g2+Gl4HKcl4MmnlRSbuKVlgD+fu77Pzstw2FMOwKQMwXYmjiYJ6cMn638s6WiGuRqA==", + "dependencies": { + "imagesloaded": ">=3.0.0", diff --git a/profiles/grafana.nix b/profiles/grafana.nix index 6d6fb2c..6657b09 100644 --- a/profiles/grafana.nix +++ b/profiles/grafana.nix @@ -2,6 +2,36 @@ let cfg = config.services.grafana; fileProvider = path: "$__file{${path}}"; + ldapServer = { + host = "ldap.federez.net ldap-ro.federez.net"; + port = 636; + use_ssl = true; + start_tls = false; + bind_dn = "cn=grafana,ou=service-users,dc=federez,dc=net"; + bind_password = fileProvider config.age.secrets.grafana-ldap-bind-password.path; + search_filter = "(&(objectClass=posixAccount)(cn=%s))"; + search_base_dns = [ "cn=Utilisateurs,dc=federez,dc=net" ]; + group_search_base_dns = [ "ou=posix,ou=groups,dc=federez,dc=net" ]; + group_search_filter = "(&(objectClass=posixGroup)(memberUid=%s))"; + group_search_filter_user_attribute = "uid"; + attributes = { + email = "mail"; + }; + "group_mappings" = [ + { + group_dn = "cn=sudoldap,ou=posix,ou=groups,dc=federez,dc=net"; + org_role = "Admin"; + grafana_admin = true; + } + { + group_dn = "*"; + org_role = "Viewer"; + } + ]; + }; + ldapConfig = (pkgs.formats.toml {}).generate "ldap.toml" { + servers = [ ldapServer ]; + }; in { age.secrets = { grafana-admin-password = { @@ -14,6 +44,11 @@ in { owner = "grafana"; group = "grafana"; }; + grafana-ldap-bind-password = { + file = ../secrets/grafana-ldap-bind-password.age; + owner = "grafana"; + group = "grafana"; + }; }; services.grafana = { @@ -30,6 +65,12 @@ in { admin_password = fileProvider config.age.secrets.grafana-admin-password.path; secret_key = fileProvider config.age.secrets.grafana-secret-key.path; }; + "auth.ldap" = { + enabled = true; + allow_sign_up = true; + skip_org_role_sync = false; + config_file = toString ldapConfig; + }; }; declarativePlugins = lib.mkIf config.services.victoriametrics.enable @@ -42,7 +83,7 @@ in { name = "VictoriaMetrics"; type = "victoriametrics-metrics-datasource"; uid = "vm"; - url = "http://localhost:8248"; + url = "http://localhost:8428"; editable = false; jsonData = { isDefault = true; diff --git a/profiles/infra-net.nix b/profiles/infra-net.nix deleted file mode 100644 index a15ee67..0000000 --- a/profiles/infra-net.nix +++ /dev/null @@ -1,217 +0,0 @@ -{ config, lib, ... }: -let - inherit (lib) mkOption types; - cfg = config.infra-net; - leafSubmodule = lib.types.submodule { - options = { - mac = mkOption { - type = types.str; - description = '' - Adresse MAC de l'interface préexistante sur le réseau INFRA. - ''; - example = "AA:BB:CC:DD:EE:FF"; - }; - id = mkOption { - type = types.ints.between 1 65535; - description = '' - Identifiant de la machine dans le réseau INFRA. - ''; - example = 194; - }; - }; - }; - hubDefSubmodule = lib.type.submodule { - options = { - hid = mkOption { - type = types.ints.between 1 255; - description = '' - Identifiant du concentrateur sur la maille WireGuard. - ''; - example = 12; - }; - public-key = mkOption { - type = types.str; - description = '' - Clé publique WireGuard du concentrateur. - ''; - example = "LwhiJgtHtYQT4Ug6tgD0RDlUhhNga5tIyiWN2A6dCnk="; - }; - address = mkOption { - type = types.str; - description = '' - Adresse IP publique du concentrateur. - ''; - example = "1.2.3.4"; - }; - port = mkOption { - type = types.port; - description = '' - Port WireGuard public du concentrateur. - ''; - default = 51039; - example = 51039; - }; - }; - }; - hubSubmodule = lib.types.submodule { - options = { - name = mkOption { - type = types.str; - description = '' - Nom d'hôte du concentrateur. - ''; - default = config.networking.hostName; - }; - all-hubs = mkOption { - type = types.attrsOf hubDefSubmodule; - description = '' - Définitions de l'ensemble des concentrateurs. - ''; - }; - private-key-path = mkOption { - type = types.path; - description = '' - Chemin vers la clé privée WireGuard du concentrateur. - ''; - }; - wg-port = mkOption { - type = types.port; - description = '' - Port d'écoute WireGuard du concentrateur. - ''; - default = 51039; - example = 51039; - }; - id = mkOption { - type = types.ints.between 1 65535; - description = '' - Identifiant de la machine dans le réseau INFRA. - ''; - example = 194; - }; - mac = mkOption { - type = types.str; - description = '' - Adresse MAC de l'interface virtuelle à du concentrateur sur - le réseau INFRA. - ''; - example = "AA:BB:CC:DD:EE:FF"; - }; - }; - }; - mkAddresses = id: let - a = id / 256; - b = id - 256 * a; - in [ - "fd0a:66d3:1c19:42::${toString a}:${toString b}/64" - "10.42.${toString a}.${toString b}/16" - ]; - mkHubAddress = hub: "fd0a:66d3:1c19:1000::${toString hub.hid}"; - mkPeer = hub: { - PublicKey = hub.public-key; - Endpoint = "${hub.address}:${hub.port}"; - AllowedIPs = mkHubAddress hub; - }; - vxlanPort = 4789; - vni = 42; - selfHub = cfg.hub.all-hubs."${cfg.hub.name}"; - otherHubs = lib.filterAttrs (n: _: n != cfg.hub.name) cfg.hub.all-hubs; - mkBridgeFDB = hub: { - MACAddress = "00:00:00:00:00:00"; - Destination = "${mkHubAddress hub}"; - VNI = vni; - }; -in { - options.infra-net = { - leaf = mkOption { - type = types.nullOr leafSubmodule; - default = null; - description = '' - Configuration de l'interface d'une feuille du réseau INFRA. - ''; - }; - hub = lib.mkOption { - type = lib.types.nullOr hubSubmodule; - default = null; - description = '' - Configuration des interfaces d'un concentrateur du réseau INFRA. - ''; - }; - }; - - config = let - hubNetwork = { - links = { - "10-wg-infra" = { - netdevConfig = { - Name = "wg-infra"; - Kind = "wireguard"; - }; - wireguardConfig = { - ListenPort = cfg.hub.wg-port; - PrivateKey = "@wg-infra-key"; - }; - wireguardPeers = map mkPeer otherHubs; - }; - "10-vxl-infra" = { - netdevConfig = { - Name = "vxl-infra"; - Kind = "vxlan"; - }; - vxlanConfig = { - Local = mkHubAddress selfHub; - VNI = vni; - MacLearning = true; - DestinationPort = vxlanPort; - }; - }; - "10-br-infra".netdevConfig = { - Name = "br-infra"; - Kind = "bridge"; - MACAddress = cfg.hub.mac; - }; - }; - networks = { - "10-wg-infra" = { - matchConfig.Name = "wg-infra"; - networkConfig = { - Address = "${mkHubAddress selfHub}/64"; - VXLAN = "vxl-infra"; - }; - }; - "10-vxl-infra" = { - matchConfig.Name = "vxl-infra"; - networkConfig = { - LinkLocalAddressing = false; - Bridge = "br-infra"; - }; - bridgeFDBs = map mkBridgeFDB otherHubs; - - }; - "10-br-infra" = { - matchConfig.Name = "br-infra"; - address = mkAddresses cfg.hub.id; - }; - }; - }; - leafNetwork = { - links."10-infra" = { - matchConfig.MACAddress = cfg.leaf.mac; - linkConfig.Name = "infra"; - }; - networks."10-infra" = { - matchConfig.Name = "infra"; - address = mkAddresses cfg.leaf.id; - }; - }; - in { - systemd.network = lib.mkMerge [ - (lib.mkIf (cfg.hub != null) hubNetwork) - (lib.mkIf (cfg.leaf != null) leafNetwork) - ]; - - systemd.services.systemd-networkd.serviceConfig.LoadCredential = - lib.mkIf (cfg.hub != null) - [ "wg-infra-key:${cfg.hub.private-key-path}" ]; - }; -} diff --git a/profiles/infra.nix b/profiles/infra.nix new file mode 100644 index 0000000..b630efd --- /dev/null +++ b/profiles/infra.nix @@ -0,0 +1,167 @@ +{ config, lib, pkgs, network, name, ... }: +let + cfg = config.infra; + node = network.infra.nodes.${name}; + hub = network.infra.hubs.${name}; + isHub = cfg.hub != null; + address = [ + "${node.ipv4}/${toString network.infra.cidr.nodes.ipv4}" + "${node.ipv6}/${toString network.infra.cidr.nodes.ipv6}" + ]; + otherHubs = let + filtered = lib.filterAttrs (n: _: n != name) network.infra.hubs; + in lib.attrValues filtered; + mkBridgeFDB = hub: { + MACAddress = "00:00:00:00:00:00"; + Destination = "${hub.ipv6}"; + VNI = network.infra.vxlan.vni; + }; + mkPeer = hub: { + PublicKey = hub.publicKey; + Endpoint = hub.endpoint; + PersistentKeepalive = 25; + AllowedIPs = [ "${hub.ipv6}" ]; + }; + iface = if isHub then "br-infra" else "infra"; + hubNetwork = { + netdevs = { + "10-wg-infra" = { + netdevConfig = { + Name = "wg-infra"; + Kind = "wireguard"; + }; + wireguardConfig = { + ListenPort = cfg.hub.wireguardPort; + PrivateKey = "@wg-infra-key"; + }; + wireguardPeers = map mkPeer otherHubs; + }; + "10-vxl-infra" = { + netdevConfig = { + Name = "vxl-infra"; + Kind = "vxlan"; + }; + vxlanConfig = { + Local = hub.ipv6; + VNI = network.infra.vxlan.vni; + MacLearning = true; + DestinationPort = network.infra.vxlan.port; + }; + }; + "10-br-infra".netdevConfig = { + Name = "br-infra"; + Kind = "bridge"; + }; + }; + networks = { + "10-wg-infra" = { + matchConfig.Name = "wg-infra"; + networkConfig = { + Address = "${hub.ipv6}/${toString network.infra.cidr.hubs.ipv6}"; + VXLAN = "vxl-infra"; + }; + }; + "10-vxl-infra" = { + matchConfig.Name = "vxl-infra"; + networkConfig = { + LinkLocalAddressing = false; + Bridge = "br-infra"; + }; + bridgeFDBs = map mkBridgeFDB otherHubs; + }; + "10-br-infra" = { + matchConfig.Name = "br-infra"; + linkConfig.MACAddress = node.mac; + address = address; + }; + }; + }; + leafNetwork = { + links."10-infra" = { + matchConfig.MACAddress = node.mac; + linkConfig.Name = "infra"; + }; + networks."10-infra" = { + matchConfig.Name = "infra"; + address = address; + }; + }; + hubFirewall = { + wg-infra.allowedUDPPorts = [ network.infra.vxlan.port ]; + br-infra = { + allowedTCPPorts = cfg.firewall.allowedTCPPorts; + allowedUDPPorts = cfg.firewall.allowedUDPPorts; + }; + }; + leafFirewall.infra = { + allowedTCPPorts = cfg.firewall.allowedTCPPorts; + allowedUDPPorts = cfg.firewall.allowedUDPPorts; + }; +in { + options.infra = { + enabled = lib.mkEnableOption "Réseau INFRA"; + hub = lib.mkOption { + type = lib.types.nullOr (lib.types.submodule { + options = { + privateKeyPath = lib.mkOption { + type = lib.types.path; + description = '' + Chemin vers la clé privée WireGuard du concentrateur. + ''; + }; + wireguardPort = lib.mkOption { + type = lib.types.port; + description = '' + Port d'écoute WireGuard du concentrateur. + ''; + default = 51039; + example = 51039; + }; + }; + }); + default = null; + description = '' + Configuration d'un concentrateur du réseau INFRA. + ''; + }; + firewall = { + allowedTCPPorts = lib.mkOption { + type = lib.types.listOf lib.types.port; + default = [ ]; + example = [ 443 9100 ]; + description = '' + Ports TCP autorisés sur le réseau INFRA. + ''; + }; + allowedUDPPorts = lib.mkOption { + type = lib.types.listOf lib.types.port; + default = [ ]; + example = [ 53 ]; + description = '' + Ports UDP autorisés sur le réseau INFRA. + ''; + }; + }; + }; + + config = lib.mkIf cfg.enabled { + systemd.network = lib.mkMerge [ + (lib.mkIf isHub hubNetwork) + (lib.mkIf (!isHub) leafNetwork) + ]; + + environment.systemPackages = lib.mkIf isHub [ + pkgs.wireguard-tools + ]; + + networking.firewall.interfaces = lib.mkMerge [ + (lib.mkIf isHub hubFirewall) + (lib.mkIf (!isHub) leafFirewall) + ]; + + systemd.services.systemd-networkd = { + serviceConfig.LoadCredential = + lib.mkIf isHub [ "wg-infra-key:${cfg.hub.privateKeyPath}" ]; + }; + }; +} diff --git a/profiles/monitoring/default.nix b/profiles/monitoring/default.nix new file mode 100644 index 0000000..22c9cbe --- /dev/null +++ b/profiles/monitoring/default.nix @@ -0,0 +1,112 @@ +{ lib, config, infra, ... }: +let + mkScrapeConfig = name: path: port: targets: { + job_name = name; + metrics_path = path; + static_configs = [ { targets = targets; } ]; + relabel_configs = [ + { source_labels = [ "__address__"]; target_label = "__param_target"; } + { source_labels = [ "__param_target"]; target_label = "instance"; } + { + source_labels = [ "__param_target"]; + target_label = "__address__"; + replacement = "$1.infra.federez.net:${toString port}"; + } + ]; + }; + nodePort = 9100; + vmPort = 8428; + nodesConfig = mkScrapeConfig "node" "/metrics" nodePort + (lib.attrsets.mapAttrsToList (n: _: n) infra.nodes); + critical = { severity = "critical"; }; + warning = { severity = "warning"; }; + importRules = path: let + attrs = import path { inherit critical warning; }; + in lib.attrsets.mapAttrsToList (n: a: a // { alert = n; }) attrs; +in { + imports = [ + ../../modules/alertbot.nix + ]; + + age.secrets.alertbot-matrix-password = { + file = ../../secrets/alertbot-matrix-password.age; + }; + + services.alertbot = { + enable = true; + listenPort = 8081; + matrix = { + homeserver = "https://matrix.federez.net"; + user = "@alertbot:federez.net"; + passwordFile = config.age.secrets.alertbot-matrix-password.path; + roomId = "!bVyCrycmkkLXdQRquJ:federez.net"; + }; + }; + + services.victoriametrics = { + enable = true; + extraOptions = [ "-enableTCP6" ]; + listenAddress = "localhost:${toString vmPort}"; + prometheusConfig = { + scrape_configs = [ nodesConfig ]; + }; + }; + + services.vmalert = { + enable = true; + rules = { + groups = [ + { + name = "common"; + rules = importRules ./rules/common.nix; + } + { + name = "node"; + rules = importRules ./rules/node.nix; + } + ]; + }; + settings = let + vmUrl = "http://localhost:${toString vmPort}"; + amUrl = "http://localhost:${toString config.services.prometheus.alertmanager.port}"; + in { + "datasource.url" = vmUrl; + "remoteWrite.url" = vmUrl; + "remoteRead.url" = vmUrl; + "notifier.url" = [ amUrl ]; + }; + }; + + services.prometheus.alertmanager = { + enable = true; + configuration = { + route = { + group_by = [ "alertname" "instance" ]; + group_wait = "30s"; + group_interval = "30s"; + repeat_interval = "24h"; + receiver = "webhook"; + }; + inhibit_rules = [ + { + source_match = critical; + target_match = warning; + equal = [ "alertname" "instance" ]; + } + ]; + receivers = [ + { + name = "webhook"; + webhook_configs = let + port = config.services.alertbot.listenPort; + in [ + { + url = "http://localhost:${toString port}/webhook"; + send_resolved = true; + } + ]; + } + ]; + }; + }; +} diff --git a/profiles/monitoring/rules/common.nix b/profiles/monitoring/rules/common.nix new file mode 100644 index 0000000..2fc9004 --- /dev/null +++ b/profiles/monitoring/rules/common.nix @@ -0,0 +1,9 @@ +{ critical, ... }: +{ + CommonTargetMissing = { + expr = "up == 0"; + for = "3m"; + labels = critical; + annotations.Job = "{{ $labels.job }}"; + }; +} diff --git a/profiles/monitoring/rules/node.nix b/profiles/monitoring/rules/node.nix new file mode 100644 index 0000000..968a1d6 --- /dev/null +++ b/profiles/monitoring/rules/node.nix @@ -0,0 +1,287 @@ +{ critical, warning, ... }: +{ + # Memory + + NodeOutOfMemory = { + expr = '' + (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) < 0.1 + ''; + for = "3m"; + labels = critical; + annotations.Available = "{{ $value | humanizePercentage }}"; + }; + + NodeUnderMemoryPressure = { + expr = "rate(node_vmstat_pgmajfault[5m]) > 1000"; + labels = critical; + for = "0m"; + annotations.Pressure = "{{ $value | humanize }}"; + }; + + NodeSwapIsFillingUp = { + expr = '' + (1 - (node_memory_SwapFree_bytes + / node_memory_SwapTotal_bytes)) > 0.5 + ''; + for = "1m"; + labels = critical; + annotations.UsedSwap = "{{ $value | humanizePercentage }}"; + }; + + NodeOomKillDetected = { + expr = "increase(node_vmstat_oom_kill[1m]) > 0"; + for = "0m"; + labels = critical; + }; + + # CPU + + NodeCpuUsage = { + expr = '' + (avg by (instance) + (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > 0.8 + ''; + for = "10m"; + labels = warning; + annotations.AverageUsage = "{{ $value | humanizePercentage }}"; + }; + + NodeCpuStealNoisyNeighbor = { + expr = '' + avg by (instance) ( + rate(node_cpu_seconds_total{mode="steal"}[2m]) + ) > 0.1 + ''; + for = "10m"; + labels = warning; + annotations.Steal = "{{ $value | humanizePercentage }}"; + }; + + # Network + + NodeLinkHighUsageIn = { + expr = '' + (rate(node_network_receive_bytes_total[5m]) + / on(instance, device) node_network_speed_bytes) > .80 + ''; + labels = warning; + for = "3m"; + annotations = { + Usage = "{{ $value | humanizePercentage }}"; + Device = "{{ $labels.device }}"; + }; + }; + + NodeLinkHighUsageOut = { + expr = '' + (rate(node_network_transmit_bytes_total[5m]) + / on(instance, device) node_network_speed_bytes) > .80 + ''; + labels = warning; + for = "3m"; + annotations = { + Usage = "{{ $value | humanizePercentage }}"; + Device = "{{ $labels.device }}"; + }; + }; + + NodeConntrackLimit = { + expr = '' + (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.8 + ''; + for = "5m"; + labels = warning; + annotations.Filled = "{{ $value | humanizePercentage }}"; + }; + + NodeNetworkReceiveErrors = { + expr = '' + rate(node_network_receive_errs_total[2m]) + / rate(node_network_receive_packets_total[2m]) > 0.01 + ''; + for = "2m"; + labels = warning; + annotations = { + Errors = "{{ $value | humanizePercentage }}"; + Device = "{{ $labels.device }}"; + }; + }; + + NodeNetworkTransmitErrors = { + expr = '' + rate(node_network_transmit_errs_total[2m]) + / rate(node_network_transmit_packets_total[2m]) > 0.01 + ''; + for = "2m"; + labels = warning; + annotations = { + Errors = "{{ $value | humanizePercentage }}"; + Device = "{{ $labels.device }}"; + }; + }; + + NodeNetworkBondDegraded = { + expr = "node_bonding_active - node_bonding_slaves != 0"; + for = "2m"; + labels = warning; + annotations.Device = "{{ $labels.device }}"; + }; + + # Temperature + + NodePhysicalComponentTooHot = { + expr = '' + node_hwmon_temp_celsius > clamp_max(79, node_hwmon_temp_max_celsius) + ''; + for = "0m"; + labels = critical; + annotations = { + Temperature = "{{ $value | humanize }} °C"; + Chip = "{{ $labels.chip }}"; + Sensor = "{{ $labels.sensor }}"; + }; + }; + + NodeNodeOvertemperatureAlarm = { + expr = "node_hwmon_temp_crit_alarm_celsius == 1"; + for = "0m"; + labels = critical; + annotations = { + Chip = "{{ $labels.chip }}"; + Sensor = "{{ $labels.sensor }}"; + }; + }; + + # Storage and disks + + NodeRaidArrayGotInactive = { + expr = '' + node_md_state{state="inactive"} > 0 + ''; + for = "0m"; + labels = critical; + annotations = { + Device = "{{ $labels.device }}"; + }; + }; + + NodeRaidDiskFailure = { + expr = '' + node_md_disks{state="failed"} > 0 + ''; + for = "0m"; + labels = critical; + annotations = { + Device = "{{ $labels.md_device }}"; + }; + }; + + NodeOutOfDiskSpace = { + expr = '' + (node_filesystem_free_bytes / node_filesystem_size_bytes < 0.1) + and on (instance, device, mountpoint) (node_filesystem_readonly) == 0 + ''; + for = "5m"; + labels = critical; + annotations = { + Mountpoint = "{{ $labels.mountpoint }}"; + FreeSpace = "{{ $value | humanizePercentage }}"; + }; + }; + + NodeOutOfInodes = { + expr = "node_filesystem_files_free / node_filesystem_files < 0.1"; + for = "3m"; + labels = critical; + annotations = { + Mountpoint = "{{ $labels.mountpoint }}"; + FreeInodes = "{{ $value | humanizePercentage }}"; + }; + }; + + NodeUnhealthyDisk = { + expr = "smartmon_device_smart_healthy < 1"; + for = "10m"; + labels = critical; + annotations.Disk = "{{ $labels.disk }}"; + }; + + NodeZfsWrongState = { + expr = '' + node_zfs_zpool_state{state!="online"} > 0 + ''; + for = "5m"; + labels = critical; + annotations = { + State = "{{ $labels.state }}"; + ZPool = "{{ $labels.zpool }}"; + }; + }; + + # Clock + + NodeClockSkew = { + expr = '' + (node_timex_offset_seconds > 0.05 + and deriv(node_timex_offset_seconds[5m]) >= 0) + or (node_timex_offset_seconds < -0.05 + and deriv(node_timex_offset_seconds[5m]) <= 0) + ''; + for = "2m"; + labels = warning; + }; + + NodeClockNotSynchronising = { + expr = '' + min_over_time(node_timex_sync_status[1m]) == 0 + and node_timex_maxerror_seconds >= 16 + ''; + for = "2m"; + labels = warning; + }; + + # Misc + + NodeLoad5Usage = { + expr = '' + node_load5 / ( + count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1.0 + ''; + for = "1m"; + labels = warning; + annotations.Load5PerCore = "{{ $value | humanize }}"; + }; + + NodeSystemdServiceFailed = { + expr = '' + node_systemd_unit_state{state="failed"} == 1 + ''; + for = "5m"; + labels = warning; + annotations.Service = "{{ $labels.name }}"; + }; + + NodeRequiresReboot = { + expr = "node_reboot_required > 0"; + for = "5m"; + labels = warning; + }; + + NodeEdacCorrectableErrorsDetected = { + expr = '' + increase(node_edac_correctable_errors_total[1m]) > 0 + ''; + for = "0m"; + labels = warning; + annotations.CorrectedErrors = "{{ $value }}"; + }; + + NodeEdacUncorrectableErrorsDetected = { + expr = '' + increase(node_edac_uncorrectable_errors_total[1m]) > 0 + ''; + for = "0m"; + labels = critical; + annotations.DetectedErrors = "{{ $value }}"; + }; +} diff --git a/profiles/netdata.nix b/profiles/netdata.nix deleted file mode 100644 index ad51573..0000000 --- a/profiles/netdata.nix +++ /dev/null @@ -1,88 +0,0 @@ -{ nodes, pkgs, lib, ... }: -let - mkChildNode = apiKey: allowFrom: '' - [${apiKey}] - enabled = yes - default history = 5000 - default memory mode = dbengine - health enabled by default = auto - allow from = ${allowFrom} - ''; - isMonitorableChild = s: lib.hasAttrByPath [ "config" "federez" "monitoring" "apiKey" ] s && s.config.federez.monitoring.apiKey != null; - filterMonitorableChildren = lib.filterAttrs (_: isMonitorableChild); - monitorableChildren = filterMonitorableChildren nodes; - streamingChildren = lib.mapAttrsToList (name: peer: '' - # ${name} - ${mkChildNode peer.config.federez.monitoring.apiKey "*"} - '') monitorableChildren; -in - { - # I wish it could be truly reproducible, but it cannot because of the access token secret. - environment.etc."netdata/health_alarm_notify.conf".enable = false; - environment.etc."netdata/health_alarm_notify.conf".source = pkgs.writeText "health_alarm_notify.conf" '' - SEND_MATRIX="YES" - MATRIX_HOMESERVER="https://matrix.federez.net" - MATRIX_ACCESSTOKEN="XXXXXXXXXXXXXXXXXXXXXXXXXXXXX" - DEFAULT_RECIPIENT_MATRIX="!vdYmGGkFFxIRklSLcO:federez.net" - ''; - - services.netdata = { - enable = true; - package = pkgs.netdataCloud; - config = { - global = { - "access log" = "none"; - "disconnect idle web clients after seconds" = 3600; - "enable web responses gzip compression" = "no"; - "errors to trigger flood protection" = 8000; - "dbengine multihost disk space" = 4 * 1024; # 8GiB - "page cache size" = 1024; # 1GiB - }; - db = { - mode = "dbengine"; - "update every" = 5; - "storage tiers" = 3; - "dbengine multihost disk space MB" = 4 * 1024; # 4GiB - "dbengine tier 1 multihost disk space MB" = 2 * 1024; # 2GiB - "dbengine tier 2 multihost disk space MB" = 1 * 1024; # 1GiB - }; - web = { - # "bind to" = "127.0.0.1 0.0.0.0 unix:/run/netdata/netdata.sock"; - # "allow connections from" = "localhost 127.0.0.1 0.0.0.0"; - # "allow dashboard from" = "localhost 127.0.0.1 0.0.0.0"; - # "allow management from" = "localhost 127.0.0.1"; - "allow streaming from" = "89.234.162.*"; - "allow connections by dns" = "no"; - "allow dashboard by dns" = "no"; - "allow badges by dns" = "no"; - "allow streaming by dns" = "no"; - "allow netdata.conf by dns" = "no"; - "allow management by dns" = "no"; - }; - "[plugin:timex]" = { - "update every" = 30; - "clock synchronization state" = "yes"; - "time offset" = "yes"; - }; - - }; - configDir = { - "stream.conf" = pkgs.writeText "stream.conf" '' - [stream] - enabled = no - enable compression = yes - - # From file - ${lib.concatStringsSep "\n" streamingChildren} - ''; - - "go.d.conf" = pkgs.writeText "go.d.conf" (builtins.toJSON { - "modules"."systemdunits" = true; - }); - }; - }; - - networking.firewall.allowedTCPPorts = [ 19999 ]; - # We are not the child. - federez.monitoring.enableChild = false; -} diff --git a/profiles/prometheus-node-exporter.nix b/profiles/prometheus-node-exporter.nix new file mode 100644 index 0000000..b309bf3 --- /dev/null +++ b/profiles/prometheus-node-exporter.nix @@ -0,0 +1,10 @@ +{ network, config, name, ... }: let + port = config.services.prometheus.exporters.node.port; + node = network.infra.nodes.${name}; +in { + infra.firewall.allowedTCPPorts = [ port ]; + + services.prometheus.exporters.node = { + enable = true; + }; +} diff --git a/profiles/victoria.nix b/profiles/victoria.nix deleted file mode 100644 index 99b6d1b..0000000 --- a/profiles/victoria.nix +++ /dev/null @@ -1,16 +0,0 @@ -{ ... }: -let - mkScrapeConfig = name: targets: { - job_name = name; - static_configs = [ { targets = targets; } ]; - }; - nodesConfig = mkScrapeConfig "node" - (map (n: "${n}.federez.net:9100") [ "dodecagon" "saigon" ]); -in { - services.victoriametrics = { - enable = true; - prometheusConfig = { - scrape_configs = [ nodesConfig ]; - }; - }; -} diff --git a/profiles/vogon.nix b/profiles/vogon.nix index adf5ffe..5254432 100644 --- a/profiles/vogon.nix +++ b/profiles/vogon.nix @@ -1,19 +1,9 @@ { config, pkgs, ... }: { - age.secrets = { - vogon-wg-infra-key = { - file = ../secrets/vogon-wg-infra-key.age; - owner = "root"; - group = "root"; - }; - }; - - systemd.services.systemd-networkd.serviceConfig.LoadCredential = [ - "wg-infra-key:${config.age.secrets.vogon-wg-infra-key.path}" + imports = [ + ./infra.nix ]; - environment.systemPackages = [ pkgs.wireguard-tools ]; - # FIXME I suck. I didn't manage to configure a working ZFS rootfs with disko # It was 1 AM, and the server had to be up and running quickly, so I # partitioned the server manually @@ -64,140 +54,60 @@ "sr_mod" ]; - # FIXME - networking.firewall.trustedInterfaces = [ "wg-infra" "vxl-infra" "br-infra" ]; - - systemd.network.links = { - "10-phy1" = { - matchConfig.MACAddress = "18:66:da:75:da:04"; - linkConfig.Name = "phy1"; + systemd.network = { + links = { + "10-phy1" = { + matchConfig.MACAddress = "18:66:da:75:da:04"; + linkConfig.Name = "phy1"; + }; + "10-phy2" = { + matchConfig.MACAddress = "18:66:da:75:da:05"; + linkConfig.Name = "phy2"; + }; }; - "10-phy2" = { - matchConfig.MACAddress = "18:66:da:75:da:05"; - linkConfig.Name = "phy2"; + netdevs = { + "10-wan".netdevConfig = { + Name = "wan"; + Kind = "bridge"; + }; + "10-bond" = { + netdevConfig = { + Name = "bond"; + Kind = "bond"; + }; + bondConfig.Mode = "802.3ad"; + }; + }; + networks = { + "10-phy1" = { + matchConfig.Name = "phy1"; + networkConfig.Bond = "bond"; + }; + "10-phy2" = { + matchConfig.Name = "phy2"; + networkConfig.Bond = "bond"; + }; + "10-bond" = { + matchConfig.Name = "bond"; + networkConfig.Bridge = "wan"; + }; + "10-wan" = { + matchConfig.Name = "wan"; + address = [ "193.54.193.161/28" ]; + routes = [ { Gateway = "193.54.193.174"; } ]; + }; }; }; - systemd.network.netdevs = { - "10-wan".netdevConfig = { - Name = "wan"; - Kind = "bridge"; - }; - "10-bond" = { - netdevConfig = { - Name = "bond"; - Kind = "bond"; - }; - bondConfig.Mode = "802.3ad"; - }; - "10-br-infra".netdevConfig = { - Name = "br-infra"; - Kind = "bridge"; - }; - "10-vxl-infra" = { - netdevConfig = { - Name = "vxl-infra"; - Kind = "vxlan"; - }; - vxlanConfig = { - Local = "fd0a:66d3:1c19:1000::1"; - VNI = 42; - MacLearning = true; - DestinationPort = 4789; - }; - }; - "10-wg-infra" = { - netdevConfig = { - Name = "wg-infra"; - Kind = "wireguard"; - }; - wireguardConfig = { - ListenPort = 51039; - PrivateKey = "@wg-infra-key"; - }; - wireguardPeers = [ - { - PublicKey = "JfTsY3+jPTDgLDrECoSvoYs+6+GpjII0ookjhFhd5SY="; - Endpoint = "89.234.162.224:51039"; - AllowedIPs = [ "fd0a:66d3:1c19:1000::2" ]; - PersistentKeepalive = 10; - } - { - PublicKey = "nOeLgmE1U6nY3UNxltQKwlID9lD7fvpEwij2XUvEGgg="; - Endpoint = "137.194.12.129:51039"; - AllowedIPs = [ "fd0a:66d3:1c19:1000::3" ]; - PersistentKeepalive = 10; - } - { - PublicKey = "9pGyE4+CQl+f8sFJ/Mkvp14yxDQJ0SJmGnher5Tgzjc="; - Endpoint = "193.48.225.201:51039"; - AllowedIPs = [ "fd0a:66d3:1c19:1000::4" ]; - PersistentKeepalive = 10; - } - ]; + age.secrets = { + vogon-wg-infra-key = { + file = ../secrets/vogon-wg-infra-key.age; + owner = "root"; + group = "root"; }; }; - systemd.network.networks = { - "10-phy1" = { - matchConfig.Name = "phy1"; - networkConfig.Bond = "bond"; - }; - "10-phy2" = { - matchConfig.Name = "phy2"; - networkConfig.Bond = "bond"; - }; - "10-bond" = { - matchConfig.Name = "bond"; - networkConfig.Bridge = "wan"; - }; - "10-wan" = { - matchConfig.Name = "wan"; - address = [ "193.54.193.161/28" ]; - routes = [ - { - Gateway = "193.54.193.174"; - } - ]; - }; - "10-br-infra" = { - matchConfig.Name = "br-infra"; - linkConfig.MACAddress = "9E:D8:78:A1:CE:22"; - address = [ - "fd0a:66d3:1c19:42::1/64" - "10.42.0.1/16" - ]; - }; - "10-vxl-infra" = { - matchConfig.Name = "vxl-infra"; - networkConfig = { - Bridge = "br-infra"; - LinkLocalAddressing = false; - }; - bridgeFDBs = [ - { - MACAddress = "00:00:00:00:00:00"; - Destination = "fd0a:66d3:1c19:1000::2"; - VNI = 42; - } - { - MACAddress = "00:00:00:00:00:00"; - Destination = "fd0a:66d3:1c19:1000::3"; - VNI = 42; - } - { - MACAddress = "00:00:00:00:00:00"; - Destination = "fd0a:66d3:1c19:1000::4"; - VNI = 42; - } - ]; - }; - "10-wg-infra" = { - matchConfig.Name = "wg-infra"; - networkConfig = { - Address = "fd0a:66d3:1c19:1000::1/64"; - VXLAN = "vxl-infra"; - }; - }; + infra.hub = { + privateKeyPath = config.age.secrets.vogon-wg-infra-key.path; }; } diff --git a/secrets/alertbot-matrix-password.age b/secrets/alertbot-matrix-password.age new file mode 100644 index 0000000..ee4616c --- /dev/null +++ b/secrets/alertbot-matrix-password.age @@ -0,0 +1,32 @@ +age-encryption.org/v1 +-> ssh-ed25519 Q17h8g UnNuI2slJV5yKWNAY3AkjT6RzncFrq7yITf0Ybs0vkc +0utz42fluysTXRpbHCF837lKAT1IAcyJx8OrleEIkhk +-> ssh-rsa krWCLQ +CMmvG1Zq8SA5lOgcj4XeiTFsfoRykAPwmKcHZWjGWhaOGmFcbj5aLnqkrvW8NKNS ++QDacKZSbnH/ry6uz8r75G6LJBgKOplSkbMny3Dwyc4lv8RAxMzaRuHortFJyh9w +wwEtD0fBYu58GJZpKSzlZCvIbdWLkhIRT+bEk9mwQGZ1zMxpfLtPYMMXN1KFNexB +rG3JtvWQngJ63yuc2rwPyTk4HWeDlTFLvwsdbv7iOUjMEnCe8Rp3OkaxzsAwPL1y +bFAa5si1QJaEO3yoWmZ4ABs1DzqZSFzCUB++GKNXDqhe3YYZJ0aXKLwvmBV0NOzD +7zG5JJg87tZhUN3MqP1SmA +-> ssh-ed25519 /vwQcQ QTI2xNSPIgJi+AeTXdwKlIFZaMevUkDFVPl5/PvHExs +0GKWSEDmVcLPST6aZdoi30mpT0y1+JprK4QAz8BrsbQ +-> ssh-ed25519 0R97PA Z29kz8W91kUk/jY7nzv3KTyqHNAttCJRlt8ugRL+q10 +cqO8CY/v+KnqQAEhjrl0AnvQRuibm7FM024VTtRn4gs +-> ssh-rsa jL+Elw +kIheWDU8TpSR6q55hWfpR/Czby0T8bqUIxEeoWFQeBPri8rwYQXUUBxVSSCURKYa +Q/mlEcwggX0vyUCe3YorAhYYZLSARxMPcjFY55eN+XJpx1bG3QDvbEU2pAQRYNBF +cN9OwdM2qPCk88jg4vwlCixIuULeFDz9kmUGXaVa1+Aq5y6/2/nhV4GyTTcn7rVU +pdHc3LETGK4gTYm5DSs5o9AiC/igtVPorgF/b55Cge0wuSnsYZOi7xqwtmJKXYZC +Zu6E8tcyd5fu6p+N04szEpGISW+M6LCXVj7Os7YAc73bhy/iorTYWTbjvYPI3VVj +cD3JceB7q0QUECqH5J6DGweDKY39r5/xjAUI0f5Ohnsy3ubAa+KcFCBkPJRalg+e +nKGQmyVr7T/OUOwyRsOB5t8teOfAIZqPhGNaEhTBvjAazb/cMpegBEDzgWnG2JD/ +dJGk47r1wK9nAsBuXYQYpiFIpDf/z5WalKjAbbjZnFiGPiMklTv1YoMiPYtAOz8+ +5vTzY7DAeMPya0UPkLJGWuGx6B9SgIiLoi80zObgvJOKJ1EY+ocF7xx4zgjfgHmh +zBLWKJlduejIv2GvD26mwR6/1ilXl1e9qCoROvxZ941ohq/zaquGB1AS3iOzYaeR +qE+S5oQqM5vb1aZo+XHLxjANKgJH7XR7Oo/JI+bICEs +-> ssh-ed25519 jIXfPA oreLfN4DXX2NNGlL+ExX9s9yWd5QaoJpq8OBPhrj7RQ +q0Abvt5EJCKiikiTGlTT9bCAM0jmAmIygZsxyRr2alE +-> ssh-ed25519 um7xWA ooHpiohzbsjSagXg4qM0sBsf9D7bQo0aBiKCAGDdAX4 +M5QEUG27Ii8O2+dfeHNJmolZCaTAGOAaWtM+MXOhx+U +--- A2doBlk8ffSxIkUsukrjf94l9lLZcTLIUlSBBUMzlFg +_@Om ϑ'¥'u5Lsy_D dmuDCm9aHUt \ No newline at end of file diff --git a/secrets/grafana-ldap-bind-password.age b/secrets/grafana-ldap-bind-password.age new file mode 100644 index 0000000000000000000000000000000000000000..3508fa1c149ad4f5947aed9a9d363d9c1dce92a9 GIT binary patch literal 1763 zcmZ9MxvD(}0mkhDgH1Xy%>|1T<8`((8-g(VnKN@{U(Pw*>~m({H!M=*1%hBBqLt5} zg{=eve_Vvg9>sqG9i*XWRU&9WrP^fY)EVjAQcFip0SW!|S~=y*vs%{Af}fcEd0q)3V^4vgcu zTTMp&tysIxmQ?Z3H-kv49UoEge0`@4WAIWAKBt;Mou_EM70TjJQMebqUqkSNioFST zg>u-dB2BnCC+%qG348%{_vrE& z(>^IG;rWV(R-q}9COYqx%u_^=0S#zIcj8XLDd*AbjVtM{40m)Ia|c>;zYEmEw8cU> zm(ME7myM3dhe%nteT9(f1`2@A`Oz^dPKScfrc;wR#DvehyZxRmw5(=izJ&Bp!cDC^ zqtjLz>?3T=>IiX&Qre7-|D<+!!HEo$%@c6L!D&vr5~t`Ey(217)f`#wV0^x#_#OH|twAauv$ zg2p@;_o)dUv@o+8^&GQm&@F=D*h^h!wi@@+=)Tkb+oVDTg=z&-Rx<8^t3>y)@>piC zN=ulzw5ntZ*LFdBQpw0b2kROAo0s$Htz&kzixOf;37SVX06B*&IZc~|hH>LHn$=is zZLLNSX;N#3W>@%{IX~2tv5QSAPCIH64P{kJNv`f*Ou)R0qy<}V4g+{~XQs|wf= zG&}z^cU`_;1*M=lT|5)f-4UhLIlEWmta<1J$W)~SC{-o~kG8JE4uT|K^ML3qLA2)F z$yVxF7_EQuSCcI*`RLxvGoSRt+N#DG1Aw?eU=C+tN{g#doO6??A8nPE(k87Zt}%HK zhh2X~bt}K(V29krt!1yHEj*jPJ$W5K)gw1zPc_f*o18;p#HOxVw$hc2T;E$Lq2I4m zPiO8X6V@sA%L;eg9W(^DK5nSz0ie_-u`Ob}jvK^EDxrgWI|;QBM{s%A&J29wFVExV zWc8`Acqm0N`iU8wzC~us^9n#cs(LdZzRqHrPM)cM%Das39}A06*tyNU+{aN+v;+)w zF-UT%;zAqeIbY|dv5B$)V!Rrn(6JtZZB&6wlY&=W1}!2ed2_pFQIlMh#|wo}eG%4< z+{Yl;K7s{Cc(GcT*qrdW~t5Tz!!xJk`CVZ~T0`smK1_N)OI3Qxz^vi@#5{AI8 zd}(4|oK2*_BOxcWb9i_4kvjhOzwflUCO4mXxqy{+y(ENyrRWuedFQe!wW*>bV-y3?zyIBje*598Km6;vAAb7f z$G`gfxBtMl5B3lKEyAx~ee&0je)-3r|K`uYC;$8+(2g%Y_da~{>*V);`|;KKi}j~T F_zxC@Mx_7% literal 0 HcmV?d00001 diff --git a/secrets/secrets.nix b/secrets/secrets.nix index c360af2..3ca3941 100644 --- a/secrets/secrets.nix +++ b/secrets/secrets.nix @@ -25,7 +25,18 @@ let wg-admins = active-admins; indico-admins = active-admins; grafana-admins = active-admins; - servers = [ estragon wagon lagon klingon aragon pendragon vogon perdrigon martagon ]; + alertbot-admins = active-admins; + servers = [ + estragon + wagon + lagon + klingon + aragon + pendragon + vogon + perdrigon + martagon + ]; in { "matrix-shared-secret.age".publicKeys = [ estragon ] ++ matrix-admins; @@ -41,4 +52,6 @@ in "grafana-admin-password.age".publicKeys = [ martagon ] ++ grafana-admins; "grafana-secret-key.age".publicKeys = [ martagon ] ++ grafana-admins; "vogon-wg-infra-key.age".publicKeys = [ vogon ] ++ wg-admins; + "grafana-ldap-bind-password.age".publicKeys = [ martagon ] ++ grafana-admins; + "alertbot-matrix-password.age".publicKeys = [ martagon ] ++ alertbot-admins; }