diff --git a/hive.nix b/hive.nix index 2da30aa..2f7636b 100644 --- a/hive.nix +++ b/hive.nix @@ -11,10 +11,11 @@ in nixpkgs = src.nixpkgs; }; - defaults = { pkgs, ... }: { + defaults = { pkgs, lib, ... }: { imports = [ ./profiles/sysadmin.nix ./profiles/glucagon.nix + ./profiles/child-netdata.nix "${src.agenix}/modules/age.nix" (disko.config diskConfig) ]; @@ -26,6 +27,9 @@ in networking.useDHCP = false; services.openssh.enable = true; + # By default, everyone is a child except klington itself. + federez.monitoring.enableChild = lib.mkDefault true; + # Enable system diffs. system.activationScripts.system-diff = { supportsDryActivation = true; # safe: only outputs to stdout @@ -47,6 +51,7 @@ in deployment.tags = [ "matrix" ]; deployment.targetHost = "estragon.federez.net"; networking.hostName = name; + federez.monitoring.apiKey = "3411043d-55b5-425e-af43-0932d6147148"; glucagon.networking = { nibble = 227; @@ -66,6 +71,7 @@ in deployment.tags = [ "vaultwarden" "pass" "passwords" ]; deployment.targetHost = "wagon.federez.net"; networking.hostName = name; + federez.monitoring.apiKey = "a8bd7953-dfca-4393-b770-98c5ab11dea5"; glucagon.networking = { nibble = 228; @@ -82,6 +88,7 @@ in deployment.tags = [ "keycloak" ]; deployment.targetHost = "lagon.federez.net"; networking.hostName = name; + federez.monitoring.apiKey = "f85dcb12-970c-4ea1-99b4-01e2fc26bc6c"; glucagon.networking = { nibble = 229; diff --git a/profiles/child-netdata.nix b/profiles/child-netdata.nix new file mode 100644 index 0000000..110ef58 --- /dev/null +++ b/profiles/child-netdata.nix @@ -0,0 +1,43 @@ +{ config, pkgs, lib, ... }: +let + inherit (lib) mkEnableOption mkIf types mkOption; + cfg = config.federez.monitoring; +in +{ + options.federez.monitoring = { + enableChild = mkEnableOption ''child mode. + This makes the current instance of netdata, headless, memoryless and minimal. + Don't use it on the central node. + '' // { default = false; }; + + apiKey = mkOption { + type = types.nullOr types.str; + default = null; + }; + }; + + config = mkIf cfg.enableChild { + assertions = [{ + assertion = cfg.apiKey != null; + message = "API key must be set if this node is a child to some netdata dashboard."; + }]; + services.netdata = { + enable = true; + config = { + global = { + "memory mode" = "none"; + }; + web = { + mode = "none"; + "accept a streaming request every seconds" = 0; + }; + }; + configDir."stream.conf" = pkgs.writeText "stream.conf" '' + [stream] + enabled = yes + destination = klingon.federez.net:19999 + api key = ${cfg.apiKey} + ''; + }; + }; +} diff --git a/profiles/netdata.nix b/profiles/netdata.nix index b3db825..ad51573 100644 --- a/profiles/netdata.nix +++ b/profiles/netdata.nix @@ -1,4 +1,88 @@ -{ ... }: { - services.netdata.enable = true; +{ nodes, pkgs, lib, ... }: +let + mkChildNode = apiKey: allowFrom: '' + [${apiKey}] + enabled = yes + default history = 5000 + default memory mode = dbengine + health enabled by default = auto + allow from = ${allowFrom} + ''; + isMonitorableChild = s: lib.hasAttrByPath [ "config" "federez" "monitoring" "apiKey" ] s && s.config.federez.monitoring.apiKey != null; + filterMonitorableChildren = lib.filterAttrs (_: isMonitorableChild); + monitorableChildren = filterMonitorableChildren nodes; + streamingChildren = lib.mapAttrsToList (name: peer: '' + # ${name} + ${mkChildNode peer.config.federez.monitoring.apiKey "*"} + '') monitorableChildren; +in + { + # I wish it could be truly reproducible, but it cannot because of the access token secret. + environment.etc."netdata/health_alarm_notify.conf".enable = false; + environment.etc."netdata/health_alarm_notify.conf".source = pkgs.writeText "health_alarm_notify.conf" '' + SEND_MATRIX="YES" + MATRIX_HOMESERVER="https://matrix.federez.net" + MATRIX_ACCESSTOKEN="XXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + DEFAULT_RECIPIENT_MATRIX="!vdYmGGkFFxIRklSLcO:federez.net" + ''; + + services.netdata = { + enable = true; + package = pkgs.netdataCloud; + config = { + global = { + "access log" = "none"; + "disconnect idle web clients after seconds" = 3600; + "enable web responses gzip compression" = "no"; + "errors to trigger flood protection" = 8000; + "dbengine multihost disk space" = 4 * 1024; # 8GiB + "page cache size" = 1024; # 1GiB + }; + db = { + mode = "dbengine"; + "update every" = 5; + "storage tiers" = 3; + "dbengine multihost disk space MB" = 4 * 1024; # 4GiB + "dbengine tier 1 multihost disk space MB" = 2 * 1024; # 2GiB + "dbengine tier 2 multihost disk space MB" = 1 * 1024; # 1GiB + }; + web = { + # "bind to" = "127.0.0.1 0.0.0.0 unix:/run/netdata/netdata.sock"; + # "allow connections from" = "localhost 127.0.0.1 0.0.0.0"; + # "allow dashboard from" = "localhost 127.0.0.1 0.0.0.0"; + # "allow management from" = "localhost 127.0.0.1"; + "allow streaming from" = "89.234.162.*"; + "allow connections by dns" = "no"; + "allow dashboard by dns" = "no"; + "allow badges by dns" = "no"; + "allow streaming by dns" = "no"; + "allow netdata.conf by dns" = "no"; + "allow management by dns" = "no"; + }; + "[plugin:timex]" = { + "update every" = 30; + "clock synchronization state" = "yes"; + "time offset" = "yes"; + }; + + }; + configDir = { + "stream.conf" = pkgs.writeText "stream.conf" '' + [stream] + enabled = no + enable compression = yes + + # From file + ${lib.concatStringsSep "\n" streamingChildren} + ''; + + "go.d.conf" = pkgs.writeText "go.d.conf" (builtins.toJSON { + "modules"."systemdunits" = true; + }); + }; + }; + networking.firewall.allowedTCPPorts = [ 19999 ]; + # We are not the child. + federez.monitoring.enableChild = false; }