wip: nixpkgs versions + infra network + monitoring
Signed-off-by: Jeltz <jeltz@federez.net>
This commit is contained in:
parent
01b5a0fe25
commit
a64b34810d
24 changed files with 1363 additions and 513 deletions
|
@ -2,6 +2,36 @@
|
|||
let
|
||||
cfg = config.services.grafana;
|
||||
fileProvider = path: "$__file{${path}}";
|
||||
ldapServer = {
|
||||
host = "ldap.federez.net ldap-ro.federez.net";
|
||||
port = 636;
|
||||
use_ssl = true;
|
||||
start_tls = false;
|
||||
bind_dn = "cn=grafana,ou=service-users,dc=federez,dc=net";
|
||||
bind_password = fileProvider config.age.secrets.grafana-ldap-bind-password.path;
|
||||
search_filter = "(&(objectClass=posixAccount)(cn=%s))";
|
||||
search_base_dns = [ "cn=Utilisateurs,dc=federez,dc=net" ];
|
||||
group_search_base_dns = [ "ou=posix,ou=groups,dc=federez,dc=net" ];
|
||||
group_search_filter = "(&(objectClass=posixGroup)(memberUid=%s))";
|
||||
group_search_filter_user_attribute = "uid";
|
||||
attributes = {
|
||||
email = "mail";
|
||||
};
|
||||
"group_mappings" = [
|
||||
{
|
||||
group_dn = "cn=sudoldap,ou=posix,ou=groups,dc=federez,dc=net";
|
||||
org_role = "Admin";
|
||||
grafana_admin = true;
|
||||
}
|
||||
{
|
||||
group_dn = "*";
|
||||
org_role = "Viewer";
|
||||
}
|
||||
];
|
||||
};
|
||||
ldapConfig = (pkgs.formats.toml {}).generate "ldap.toml" {
|
||||
servers = [ ldapServer ];
|
||||
};
|
||||
in {
|
||||
age.secrets = {
|
||||
grafana-admin-password = {
|
||||
|
@ -14,6 +44,11 @@ in {
|
|||
owner = "grafana";
|
||||
group = "grafana";
|
||||
};
|
||||
grafana-ldap-bind-password = {
|
||||
file = ../secrets/grafana-ldap-bind-password.age;
|
||||
owner = "grafana";
|
||||
group = "grafana";
|
||||
};
|
||||
};
|
||||
|
||||
services.grafana = {
|
||||
|
@ -30,6 +65,12 @@ in {
|
|||
admin_password = fileProvider config.age.secrets.grafana-admin-password.path;
|
||||
secret_key = fileProvider config.age.secrets.grafana-secret-key.path;
|
||||
};
|
||||
"auth.ldap" = {
|
||||
enabled = true;
|
||||
allow_sign_up = true;
|
||||
skip_org_role_sync = false;
|
||||
config_file = toString ldapConfig;
|
||||
};
|
||||
};
|
||||
|
||||
declarativePlugins = lib.mkIf config.services.victoriametrics.enable
|
||||
|
@ -42,7 +83,7 @@ in {
|
|||
name = "VictoriaMetrics";
|
||||
type = "victoriametrics-metrics-datasource";
|
||||
uid = "vm";
|
||||
url = "http://localhost:8248";
|
||||
url = "http://localhost:8428";
|
||||
editable = false;
|
||||
jsonData = {
|
||||
isDefault = true;
|
||||
|
|
|
@ -1,217 +0,0 @@
|
|||
{ config, lib, ... }:
|
||||
let
|
||||
inherit (lib) mkOption types;
|
||||
cfg = config.infra-net;
|
||||
leafSubmodule = lib.types.submodule {
|
||||
options = {
|
||||
mac = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Adresse MAC de l'interface préexistante sur le réseau INFRA.
|
||||
'';
|
||||
example = "AA:BB:CC:DD:EE:FF";
|
||||
};
|
||||
id = mkOption {
|
||||
type = types.ints.between 1 65535;
|
||||
description = ''
|
||||
Identifiant de la machine dans le réseau INFRA.
|
||||
'';
|
||||
example = 194;
|
||||
};
|
||||
};
|
||||
};
|
||||
hubDefSubmodule = lib.type.submodule {
|
||||
options = {
|
||||
hid = mkOption {
|
||||
type = types.ints.between 1 255;
|
||||
description = ''
|
||||
Identifiant du concentrateur sur la maille WireGuard.
|
||||
'';
|
||||
example = 12;
|
||||
};
|
||||
public-key = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Clé publique WireGuard du concentrateur.
|
||||
'';
|
||||
example = "LwhiJgtHtYQT4Ug6tgD0RDlUhhNga5tIyiWN2A6dCnk=";
|
||||
};
|
||||
address = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Adresse IP publique du concentrateur.
|
||||
'';
|
||||
example = "1.2.3.4";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
description = ''
|
||||
Port WireGuard public du concentrateur.
|
||||
'';
|
||||
default = 51039;
|
||||
example = 51039;
|
||||
};
|
||||
};
|
||||
};
|
||||
hubSubmodule = lib.types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Nom d'hôte du concentrateur.
|
||||
'';
|
||||
default = config.networking.hostName;
|
||||
};
|
||||
all-hubs = mkOption {
|
||||
type = types.attrsOf hubDefSubmodule;
|
||||
description = ''
|
||||
Définitions de l'ensemble des concentrateurs.
|
||||
'';
|
||||
};
|
||||
private-key-path = mkOption {
|
||||
type = types.path;
|
||||
description = ''
|
||||
Chemin vers la clé privée WireGuard du concentrateur.
|
||||
'';
|
||||
};
|
||||
wg-port = mkOption {
|
||||
type = types.port;
|
||||
description = ''
|
||||
Port d'écoute WireGuard du concentrateur.
|
||||
'';
|
||||
default = 51039;
|
||||
example = 51039;
|
||||
};
|
||||
id = mkOption {
|
||||
type = types.ints.between 1 65535;
|
||||
description = ''
|
||||
Identifiant de la machine dans le réseau INFRA.
|
||||
'';
|
||||
example = 194;
|
||||
};
|
||||
mac = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Adresse MAC de l'interface virtuelle à du concentrateur sur
|
||||
le réseau INFRA.
|
||||
'';
|
||||
example = "AA:BB:CC:DD:EE:FF";
|
||||
};
|
||||
};
|
||||
};
|
||||
mkAddresses = id: let
|
||||
a = id / 256;
|
||||
b = id - 256 * a;
|
||||
in [
|
||||
"fd0a:66d3:1c19:42::${toString a}:${toString b}/64"
|
||||
"10.42.${toString a}.${toString b}/16"
|
||||
];
|
||||
mkHubAddress = hub: "fd0a:66d3:1c19:1000::${toString hub.hid}";
|
||||
mkPeer = hub: {
|
||||
PublicKey = hub.public-key;
|
||||
Endpoint = "${hub.address}:${hub.port}";
|
||||
AllowedIPs = mkHubAddress hub;
|
||||
};
|
||||
vxlanPort = 4789;
|
||||
vni = 42;
|
||||
selfHub = cfg.hub.all-hubs."${cfg.hub.name}";
|
||||
otherHubs = lib.filterAttrs (n: _: n != cfg.hub.name) cfg.hub.all-hubs;
|
||||
mkBridgeFDB = hub: {
|
||||
MACAddress = "00:00:00:00:00:00";
|
||||
Destination = "${mkHubAddress hub}";
|
||||
VNI = vni;
|
||||
};
|
||||
in {
|
||||
options.infra-net = {
|
||||
leaf = mkOption {
|
||||
type = types.nullOr leafSubmodule;
|
||||
default = null;
|
||||
description = ''
|
||||
Configuration de l'interface d'une feuille du réseau INFRA.
|
||||
'';
|
||||
};
|
||||
hub = lib.mkOption {
|
||||
type = lib.types.nullOr hubSubmodule;
|
||||
default = null;
|
||||
description = ''
|
||||
Configuration des interfaces d'un concentrateur du réseau INFRA.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
config = let
|
||||
hubNetwork = {
|
||||
links = {
|
||||
"10-wg-infra" = {
|
||||
netdevConfig = {
|
||||
Name = "wg-infra";
|
||||
Kind = "wireguard";
|
||||
};
|
||||
wireguardConfig = {
|
||||
ListenPort = cfg.hub.wg-port;
|
||||
PrivateKey = "@wg-infra-key";
|
||||
};
|
||||
wireguardPeers = map mkPeer otherHubs;
|
||||
};
|
||||
"10-vxl-infra" = {
|
||||
netdevConfig = {
|
||||
Name = "vxl-infra";
|
||||
Kind = "vxlan";
|
||||
};
|
||||
vxlanConfig = {
|
||||
Local = mkHubAddress selfHub;
|
||||
VNI = vni;
|
||||
MacLearning = true;
|
||||
DestinationPort = vxlanPort;
|
||||
};
|
||||
};
|
||||
"10-br-infra".netdevConfig = {
|
||||
Name = "br-infra";
|
||||
Kind = "bridge";
|
||||
MACAddress = cfg.hub.mac;
|
||||
};
|
||||
};
|
||||
networks = {
|
||||
"10-wg-infra" = {
|
||||
matchConfig.Name = "wg-infra";
|
||||
networkConfig = {
|
||||
Address = "${mkHubAddress selfHub}/64";
|
||||
VXLAN = "vxl-infra";
|
||||
};
|
||||
};
|
||||
"10-vxl-infra" = {
|
||||
matchConfig.Name = "vxl-infra";
|
||||
networkConfig = {
|
||||
LinkLocalAddressing = false;
|
||||
Bridge = "br-infra";
|
||||
};
|
||||
bridgeFDBs = map mkBridgeFDB otherHubs;
|
||||
|
||||
};
|
||||
"10-br-infra" = {
|
||||
matchConfig.Name = "br-infra";
|
||||
address = mkAddresses cfg.hub.id;
|
||||
};
|
||||
};
|
||||
};
|
||||
leafNetwork = {
|
||||
links."10-infra" = {
|
||||
matchConfig.MACAddress = cfg.leaf.mac;
|
||||
linkConfig.Name = "infra";
|
||||
};
|
||||
networks."10-infra" = {
|
||||
matchConfig.Name = "infra";
|
||||
address = mkAddresses cfg.leaf.id;
|
||||
};
|
||||
};
|
||||
in {
|
||||
systemd.network = lib.mkMerge [
|
||||
(lib.mkIf (cfg.hub != null) hubNetwork)
|
||||
(lib.mkIf (cfg.leaf != null) leafNetwork)
|
||||
];
|
||||
|
||||
systemd.services.systemd-networkd.serviceConfig.LoadCredential =
|
||||
lib.mkIf (cfg.hub != null)
|
||||
[ "wg-infra-key:${cfg.hub.private-key-path}" ];
|
||||
};
|
||||
}
|
167
profiles/infra.nix
Normal file
167
profiles/infra.nix
Normal file
|
@ -0,0 +1,167 @@
|
|||
{ config, lib, pkgs, network, name, ... }:
|
||||
let
|
||||
cfg = config.infra;
|
||||
node = network.infra.nodes.${name};
|
||||
hub = network.infra.hubs.${name};
|
||||
isHub = cfg.hub != null;
|
||||
address = [
|
||||
"${node.ipv4}/${toString network.infra.cidr.nodes.ipv4}"
|
||||
"${node.ipv6}/${toString network.infra.cidr.nodes.ipv6}"
|
||||
];
|
||||
otherHubs = let
|
||||
filtered = lib.filterAttrs (n: _: n != name) network.infra.hubs;
|
||||
in lib.attrValues filtered;
|
||||
mkBridgeFDB = hub: {
|
||||
MACAddress = "00:00:00:00:00:00";
|
||||
Destination = "${hub.ipv6}";
|
||||
VNI = network.infra.vxlan.vni;
|
||||
};
|
||||
mkPeer = hub: {
|
||||
PublicKey = hub.publicKey;
|
||||
Endpoint = hub.endpoint;
|
||||
PersistentKeepalive = 25;
|
||||
AllowedIPs = [ "${hub.ipv6}" ];
|
||||
};
|
||||
iface = if isHub then "br-infra" else "infra";
|
||||
hubNetwork = {
|
||||
netdevs = {
|
||||
"10-wg-infra" = {
|
||||
netdevConfig = {
|
||||
Name = "wg-infra";
|
||||
Kind = "wireguard";
|
||||
};
|
||||
wireguardConfig = {
|
||||
ListenPort = cfg.hub.wireguardPort;
|
||||
PrivateKey = "@wg-infra-key";
|
||||
};
|
||||
wireguardPeers = map mkPeer otherHubs;
|
||||
};
|
||||
"10-vxl-infra" = {
|
||||
netdevConfig = {
|
||||
Name = "vxl-infra";
|
||||
Kind = "vxlan";
|
||||
};
|
||||
vxlanConfig = {
|
||||
Local = hub.ipv6;
|
||||
VNI = network.infra.vxlan.vni;
|
||||
MacLearning = true;
|
||||
DestinationPort = network.infra.vxlan.port;
|
||||
};
|
||||
};
|
||||
"10-br-infra".netdevConfig = {
|
||||
Name = "br-infra";
|
||||
Kind = "bridge";
|
||||
};
|
||||
};
|
||||
networks = {
|
||||
"10-wg-infra" = {
|
||||
matchConfig.Name = "wg-infra";
|
||||
networkConfig = {
|
||||
Address = "${hub.ipv6}/${toString network.infra.cidr.hubs.ipv6}";
|
||||
VXLAN = "vxl-infra";
|
||||
};
|
||||
};
|
||||
"10-vxl-infra" = {
|
||||
matchConfig.Name = "vxl-infra";
|
||||
networkConfig = {
|
||||
LinkLocalAddressing = false;
|
||||
Bridge = "br-infra";
|
||||
};
|
||||
bridgeFDBs = map mkBridgeFDB otherHubs;
|
||||
};
|
||||
"10-br-infra" = {
|
||||
matchConfig.Name = "br-infra";
|
||||
linkConfig.MACAddress = node.mac;
|
||||
address = address;
|
||||
};
|
||||
};
|
||||
};
|
||||
leafNetwork = {
|
||||
links."10-infra" = {
|
||||
matchConfig.MACAddress = node.mac;
|
||||
linkConfig.Name = "infra";
|
||||
};
|
||||
networks."10-infra" = {
|
||||
matchConfig.Name = "infra";
|
||||
address = address;
|
||||
};
|
||||
};
|
||||
hubFirewall = {
|
||||
wg-infra.allowedUDPPorts = [ network.infra.vxlan.port ];
|
||||
br-infra = {
|
||||
allowedTCPPorts = cfg.firewall.allowedTCPPorts;
|
||||
allowedUDPPorts = cfg.firewall.allowedUDPPorts;
|
||||
};
|
||||
};
|
||||
leafFirewall.infra = {
|
||||
allowedTCPPorts = cfg.firewall.allowedTCPPorts;
|
||||
allowedUDPPorts = cfg.firewall.allowedUDPPorts;
|
||||
};
|
||||
in {
|
||||
options.infra = {
|
||||
enabled = lib.mkEnableOption "Réseau INFRA";
|
||||
hub = lib.mkOption {
|
||||
type = lib.types.nullOr (lib.types.submodule {
|
||||
options = {
|
||||
privateKeyPath = lib.mkOption {
|
||||
type = lib.types.path;
|
||||
description = ''
|
||||
Chemin vers la clé privée WireGuard du concentrateur.
|
||||
'';
|
||||
};
|
||||
wireguardPort = lib.mkOption {
|
||||
type = lib.types.port;
|
||||
description = ''
|
||||
Port d'écoute WireGuard du concentrateur.
|
||||
'';
|
||||
default = 51039;
|
||||
example = 51039;
|
||||
};
|
||||
};
|
||||
});
|
||||
default = null;
|
||||
description = ''
|
||||
Configuration d'un concentrateur du réseau INFRA.
|
||||
'';
|
||||
};
|
||||
firewall = {
|
||||
allowedTCPPorts = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.port;
|
||||
default = [ ];
|
||||
example = [ 443 9100 ];
|
||||
description = ''
|
||||
Ports TCP autorisés sur le réseau INFRA.
|
||||
'';
|
||||
};
|
||||
allowedUDPPorts = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.port;
|
||||
default = [ ];
|
||||
example = [ 53 ];
|
||||
description = ''
|
||||
Ports UDP autorisés sur le réseau INFRA.
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enabled {
|
||||
systemd.network = lib.mkMerge [
|
||||
(lib.mkIf isHub hubNetwork)
|
||||
(lib.mkIf (!isHub) leafNetwork)
|
||||
];
|
||||
|
||||
environment.systemPackages = lib.mkIf isHub [
|
||||
pkgs.wireguard-tools
|
||||
];
|
||||
|
||||
networking.firewall.interfaces = lib.mkMerge [
|
||||
(lib.mkIf isHub hubFirewall)
|
||||
(lib.mkIf (!isHub) leafFirewall)
|
||||
];
|
||||
|
||||
systemd.services.systemd-networkd = {
|
||||
serviceConfig.LoadCredential =
|
||||
lib.mkIf isHub [ "wg-infra-key:${cfg.hub.privateKeyPath}" ];
|
||||
};
|
||||
};
|
||||
}
|
112
profiles/monitoring/default.nix
Normal file
112
profiles/monitoring/default.nix
Normal file
|
@ -0,0 +1,112 @@
|
|||
{ lib, config, infra, ... }:
|
||||
let
|
||||
mkScrapeConfig = name: path: port: targets: {
|
||||
job_name = name;
|
||||
metrics_path = path;
|
||||
static_configs = [ { targets = targets; } ];
|
||||
relabel_configs = [
|
||||
{ source_labels = [ "__address__"]; target_label = "__param_target"; }
|
||||
{ source_labels = [ "__param_target"]; target_label = "instance"; }
|
||||
{
|
||||
source_labels = [ "__param_target"];
|
||||
target_label = "__address__";
|
||||
replacement = "$1.infra.federez.net:${toString port}";
|
||||
}
|
||||
];
|
||||
};
|
||||
nodePort = 9100;
|
||||
vmPort = 8428;
|
||||
nodesConfig = mkScrapeConfig "node" "/metrics" nodePort
|
||||
(lib.attrsets.mapAttrsToList (n: _: n) infra.nodes);
|
||||
critical = { severity = "critical"; };
|
||||
warning = { severity = "warning"; };
|
||||
importRules = path: let
|
||||
attrs = import path { inherit critical warning; };
|
||||
in lib.attrsets.mapAttrsToList (n: a: a // { alert = n; }) attrs;
|
||||
in {
|
||||
imports = [
|
||||
../../modules/alertbot.nix
|
||||
];
|
||||
|
||||
age.secrets.alertbot-matrix-password = {
|
||||
file = ../../secrets/alertbot-matrix-password.age;
|
||||
};
|
||||
|
||||
services.alertbot = {
|
||||
enable = true;
|
||||
listenPort = 8081;
|
||||
matrix = {
|
||||
homeserver = "https://matrix.federez.net";
|
||||
user = "@alertbot:federez.net";
|
||||
passwordFile = config.age.secrets.alertbot-matrix-password.path;
|
||||
roomId = "!bVyCrycmkkLXdQRquJ:federez.net";
|
||||
};
|
||||
};
|
||||
|
||||
services.victoriametrics = {
|
||||
enable = true;
|
||||
extraOptions = [ "-enableTCP6" ];
|
||||
listenAddress = "localhost:${toString vmPort}";
|
||||
prometheusConfig = {
|
||||
scrape_configs = [ nodesConfig ];
|
||||
};
|
||||
};
|
||||
|
||||
services.vmalert = {
|
||||
enable = true;
|
||||
rules = {
|
||||
groups = [
|
||||
{
|
||||
name = "common";
|
||||
rules = importRules ./rules/common.nix;
|
||||
}
|
||||
{
|
||||
name = "node";
|
||||
rules = importRules ./rules/node.nix;
|
||||
}
|
||||
];
|
||||
};
|
||||
settings = let
|
||||
vmUrl = "http://localhost:${toString vmPort}";
|
||||
amUrl = "http://localhost:${toString config.services.prometheus.alertmanager.port}";
|
||||
in {
|
||||
"datasource.url" = vmUrl;
|
||||
"remoteWrite.url" = vmUrl;
|
||||
"remoteRead.url" = vmUrl;
|
||||
"notifier.url" = [ amUrl ];
|
||||
};
|
||||
};
|
||||
|
||||
services.prometheus.alertmanager = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
route = {
|
||||
group_by = [ "alertname" "instance" ];
|
||||
group_wait = "30s";
|
||||
group_interval = "30s";
|
||||
repeat_interval = "24h";
|
||||
receiver = "webhook";
|
||||
};
|
||||
inhibit_rules = [
|
||||
{
|
||||
source_match = critical;
|
||||
target_match = warning;
|
||||
equal = [ "alertname" "instance" ];
|
||||
}
|
||||
];
|
||||
receivers = [
|
||||
{
|
||||
name = "webhook";
|
||||
webhook_configs = let
|
||||
port = config.services.alertbot.listenPort;
|
||||
in [
|
||||
{
|
||||
url = "http://localhost:${toString port}/webhook";
|
||||
send_resolved = true;
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
9
profiles/monitoring/rules/common.nix
Normal file
9
profiles/monitoring/rules/common.nix
Normal file
|
@ -0,0 +1,9 @@
|
|||
{ critical, ... }:
|
||||
{
|
||||
CommonTargetMissing = {
|
||||
expr = "up == 0";
|
||||
for = "3m";
|
||||
labels = critical;
|
||||
annotations.Job = "{{ $labels.job }}";
|
||||
};
|
||||
}
|
287
profiles/monitoring/rules/node.nix
Normal file
287
profiles/monitoring/rules/node.nix
Normal file
|
@ -0,0 +1,287 @@
|
|||
{ critical, warning, ... }:
|
||||
{
|
||||
# Memory
|
||||
|
||||
NodeOutOfMemory = {
|
||||
expr = ''
|
||||
(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) < 0.1
|
||||
'';
|
||||
for = "3m";
|
||||
labels = critical;
|
||||
annotations.Available = "{{ $value | humanizePercentage }}";
|
||||
};
|
||||
|
||||
NodeUnderMemoryPressure = {
|
||||
expr = "rate(node_vmstat_pgmajfault[5m]) > 1000";
|
||||
labels = critical;
|
||||
for = "0m";
|
||||
annotations.Pressure = "{{ $value | humanize }}";
|
||||
};
|
||||
|
||||
NodeSwapIsFillingUp = {
|
||||
expr = ''
|
||||
(1 - (node_memory_SwapFree_bytes
|
||||
/ node_memory_SwapTotal_bytes)) > 0.5
|
||||
'';
|
||||
for = "1m";
|
||||
labels = critical;
|
||||
annotations.UsedSwap = "{{ $value | humanizePercentage }}";
|
||||
};
|
||||
|
||||
NodeOomKillDetected = {
|
||||
expr = "increase(node_vmstat_oom_kill[1m]) > 0";
|
||||
for = "0m";
|
||||
labels = critical;
|
||||
};
|
||||
|
||||
# CPU
|
||||
|
||||
NodeCpuUsage = {
|
||||
expr = ''
|
||||
(avg by (instance)
|
||||
(rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > 0.8
|
||||
'';
|
||||
for = "10m";
|
||||
labels = warning;
|
||||
annotations.AverageUsage = "{{ $value | humanizePercentage }}";
|
||||
};
|
||||
|
||||
NodeCpuStealNoisyNeighbor = {
|
||||
expr = ''
|
||||
avg by (instance) (
|
||||
rate(node_cpu_seconds_total{mode="steal"}[2m])
|
||||
) > 0.1
|
||||
'';
|
||||
for = "10m";
|
||||
labels = warning;
|
||||
annotations.Steal = "{{ $value | humanizePercentage }}";
|
||||
};
|
||||
|
||||
# Network
|
||||
|
||||
NodeLinkHighUsageIn = {
|
||||
expr = ''
|
||||
(rate(node_network_receive_bytes_total[5m])
|
||||
/ on(instance, device) node_network_speed_bytes) > .80
|
||||
'';
|
||||
labels = warning;
|
||||
for = "3m";
|
||||
annotations = {
|
||||
Usage = "{{ $value | humanizePercentage }}";
|
||||
Device = "{{ $labels.device }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeLinkHighUsageOut = {
|
||||
expr = ''
|
||||
(rate(node_network_transmit_bytes_total[5m])
|
||||
/ on(instance, device) node_network_speed_bytes) > .80
|
||||
'';
|
||||
labels = warning;
|
||||
for = "3m";
|
||||
annotations = {
|
||||
Usage = "{{ $value | humanizePercentage }}";
|
||||
Device = "{{ $labels.device }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeConntrackLimit = {
|
||||
expr = ''
|
||||
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.8
|
||||
'';
|
||||
for = "5m";
|
||||
labels = warning;
|
||||
annotations.Filled = "{{ $value | humanizePercentage }}";
|
||||
};
|
||||
|
||||
NodeNetworkReceiveErrors = {
|
||||
expr = ''
|
||||
rate(node_network_receive_errs_total[2m])
|
||||
/ rate(node_network_receive_packets_total[2m]) > 0.01
|
||||
'';
|
||||
for = "2m";
|
||||
labels = warning;
|
||||
annotations = {
|
||||
Errors = "{{ $value | humanizePercentage }}";
|
||||
Device = "{{ $labels.device }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeNetworkTransmitErrors = {
|
||||
expr = ''
|
||||
rate(node_network_transmit_errs_total[2m])
|
||||
/ rate(node_network_transmit_packets_total[2m]) > 0.01
|
||||
'';
|
||||
for = "2m";
|
||||
labels = warning;
|
||||
annotations = {
|
||||
Errors = "{{ $value | humanizePercentage }}";
|
||||
Device = "{{ $labels.device }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeNetworkBondDegraded = {
|
||||
expr = "node_bonding_active - node_bonding_slaves != 0";
|
||||
for = "2m";
|
||||
labels = warning;
|
||||
annotations.Device = "{{ $labels.device }}";
|
||||
};
|
||||
|
||||
# Temperature
|
||||
|
||||
NodePhysicalComponentTooHot = {
|
||||
expr = ''
|
||||
node_hwmon_temp_celsius > clamp_max(79, node_hwmon_temp_max_celsius)
|
||||
'';
|
||||
for = "0m";
|
||||
labels = critical;
|
||||
annotations = {
|
||||
Temperature = "{{ $value | humanize }} °C";
|
||||
Chip = "{{ $labels.chip }}";
|
||||
Sensor = "{{ $labels.sensor }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeNodeOvertemperatureAlarm = {
|
||||
expr = "node_hwmon_temp_crit_alarm_celsius == 1";
|
||||
for = "0m";
|
||||
labels = critical;
|
||||
annotations = {
|
||||
Chip = "{{ $labels.chip }}";
|
||||
Sensor = "{{ $labels.sensor }}";
|
||||
};
|
||||
};
|
||||
|
||||
# Storage and disks
|
||||
|
||||
NodeRaidArrayGotInactive = {
|
||||
expr = ''
|
||||
node_md_state{state="inactive"} > 0
|
||||
'';
|
||||
for = "0m";
|
||||
labels = critical;
|
||||
annotations = {
|
||||
Device = "{{ $labels.device }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeRaidDiskFailure = {
|
||||
expr = ''
|
||||
node_md_disks{state="failed"} > 0
|
||||
'';
|
||||
for = "0m";
|
||||
labels = critical;
|
||||
annotations = {
|
||||
Device = "{{ $labels.md_device }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeOutOfDiskSpace = {
|
||||
expr = ''
|
||||
(node_filesystem_free_bytes / node_filesystem_size_bytes < 0.1)
|
||||
and on (instance, device, mountpoint) (node_filesystem_readonly) == 0
|
||||
'';
|
||||
for = "5m";
|
||||
labels = critical;
|
||||
annotations = {
|
||||
Mountpoint = "{{ $labels.mountpoint }}";
|
||||
FreeSpace = "{{ $value | humanizePercentage }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeOutOfInodes = {
|
||||
expr = "node_filesystem_files_free / node_filesystem_files < 0.1";
|
||||
for = "3m";
|
||||
labels = critical;
|
||||
annotations = {
|
||||
Mountpoint = "{{ $labels.mountpoint }}";
|
||||
FreeInodes = "{{ $value | humanizePercentage }}";
|
||||
};
|
||||
};
|
||||
|
||||
NodeUnhealthyDisk = {
|
||||
expr = "smartmon_device_smart_healthy < 1";
|
||||
for = "10m";
|
||||
labels = critical;
|
||||
annotations.Disk = "{{ $labels.disk }}";
|
||||
};
|
||||
|
||||
NodeZfsWrongState = {
|
||||
expr = ''
|
||||
node_zfs_zpool_state{state!="online"} > 0
|
||||
'';
|
||||
for = "5m";
|
||||
labels = critical;
|
||||
annotations = {
|
||||
State = "{{ $labels.state }}";
|
||||
ZPool = "{{ $labels.zpool }}";
|
||||
};
|
||||
};
|
||||
|
||||
# Clock
|
||||
|
||||
NodeClockSkew = {
|
||||
expr = ''
|
||||
(node_timex_offset_seconds > 0.05
|
||||
and deriv(node_timex_offset_seconds[5m]) >= 0)
|
||||
or (node_timex_offset_seconds < -0.05
|
||||
and deriv(node_timex_offset_seconds[5m]) <= 0)
|
||||
'';
|
||||
for = "2m";
|
||||
labels = warning;
|
||||
};
|
||||
|
||||
NodeClockNotSynchronising = {
|
||||
expr = ''
|
||||
min_over_time(node_timex_sync_status[1m]) == 0
|
||||
and node_timex_maxerror_seconds >= 16
|
||||
'';
|
||||
for = "2m";
|
||||
labels = warning;
|
||||
};
|
||||
|
||||
# Misc
|
||||
|
||||
NodeLoad5Usage = {
|
||||
expr = ''
|
||||
node_load5 / (
|
||||
count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1.0
|
||||
'';
|
||||
for = "1m";
|
||||
labels = warning;
|
||||
annotations.Load5PerCore = "{{ $value | humanize }}";
|
||||
};
|
||||
|
||||
NodeSystemdServiceFailed = {
|
||||
expr = ''
|
||||
node_systemd_unit_state{state="failed"} == 1
|
||||
'';
|
||||
for = "5m";
|
||||
labels = warning;
|
||||
annotations.Service = "{{ $labels.name }}";
|
||||
};
|
||||
|
||||
NodeRequiresReboot = {
|
||||
expr = "node_reboot_required > 0";
|
||||
for = "5m";
|
||||
labels = warning;
|
||||
};
|
||||
|
||||
NodeEdacCorrectableErrorsDetected = {
|
||||
expr = ''
|
||||
increase(node_edac_correctable_errors_total[1m]) > 0
|
||||
'';
|
||||
for = "0m";
|
||||
labels = warning;
|
||||
annotations.CorrectedErrors = "{{ $value }}";
|
||||
};
|
||||
|
||||
NodeEdacUncorrectableErrorsDetected = {
|
||||
expr = ''
|
||||
increase(node_edac_uncorrectable_errors_total[1m]) > 0
|
||||
'';
|
||||
for = "0m";
|
||||
labels = critical;
|
||||
annotations.DetectedErrors = "{{ $value }}";
|
||||
};
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
{ nodes, pkgs, lib, ... }:
|
||||
let
|
||||
mkChildNode = apiKey: allowFrom: ''
|
||||
[${apiKey}]
|
||||
enabled = yes
|
||||
default history = 5000
|
||||
default memory mode = dbengine
|
||||
health enabled by default = auto
|
||||
allow from = ${allowFrom}
|
||||
'';
|
||||
isMonitorableChild = s: lib.hasAttrByPath [ "config" "federez" "monitoring" "apiKey" ] s && s.config.federez.monitoring.apiKey != null;
|
||||
filterMonitorableChildren = lib.filterAttrs (_: isMonitorableChild);
|
||||
monitorableChildren = filterMonitorableChildren nodes;
|
||||
streamingChildren = lib.mapAttrsToList (name: peer: ''
|
||||
# ${name}
|
||||
${mkChildNode peer.config.federez.monitoring.apiKey "*"}
|
||||
'') monitorableChildren;
|
||||
in
|
||||
{
|
||||
# I wish it could be truly reproducible, but it cannot because of the access token secret.
|
||||
environment.etc."netdata/health_alarm_notify.conf".enable = false;
|
||||
environment.etc."netdata/health_alarm_notify.conf".source = pkgs.writeText "health_alarm_notify.conf" ''
|
||||
SEND_MATRIX="YES"
|
||||
MATRIX_HOMESERVER="https://matrix.federez.net"
|
||||
MATRIX_ACCESSTOKEN="XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
||||
DEFAULT_RECIPIENT_MATRIX="!vdYmGGkFFxIRklSLcO:federez.net"
|
||||
'';
|
||||
|
||||
services.netdata = {
|
||||
enable = true;
|
||||
package = pkgs.netdataCloud;
|
||||
config = {
|
||||
global = {
|
||||
"access log" = "none";
|
||||
"disconnect idle web clients after seconds" = 3600;
|
||||
"enable web responses gzip compression" = "no";
|
||||
"errors to trigger flood protection" = 8000;
|
||||
"dbengine multihost disk space" = 4 * 1024; # 8GiB
|
||||
"page cache size" = 1024; # 1GiB
|
||||
};
|
||||
db = {
|
||||
mode = "dbengine";
|
||||
"update every" = 5;
|
||||
"storage tiers" = 3;
|
||||
"dbengine multihost disk space MB" = 4 * 1024; # 4GiB
|
||||
"dbengine tier 1 multihost disk space MB" = 2 * 1024; # 2GiB
|
||||
"dbengine tier 2 multihost disk space MB" = 1 * 1024; # 1GiB
|
||||
};
|
||||
web = {
|
||||
# "bind to" = "127.0.0.1 0.0.0.0 unix:/run/netdata/netdata.sock";
|
||||
# "allow connections from" = "localhost 127.0.0.1 0.0.0.0";
|
||||
# "allow dashboard from" = "localhost 127.0.0.1 0.0.0.0";
|
||||
# "allow management from" = "localhost 127.0.0.1";
|
||||
"allow streaming from" = "89.234.162.*";
|
||||
"allow connections by dns" = "no";
|
||||
"allow dashboard by dns" = "no";
|
||||
"allow badges by dns" = "no";
|
||||
"allow streaming by dns" = "no";
|
||||
"allow netdata.conf by dns" = "no";
|
||||
"allow management by dns" = "no";
|
||||
};
|
||||
"[plugin:timex]" = {
|
||||
"update every" = 30;
|
||||
"clock synchronization state" = "yes";
|
||||
"time offset" = "yes";
|
||||
};
|
||||
|
||||
};
|
||||
configDir = {
|
||||
"stream.conf" = pkgs.writeText "stream.conf" ''
|
||||
[stream]
|
||||
enabled = no
|
||||
enable compression = yes
|
||||
|
||||
# From file
|
||||
${lib.concatStringsSep "\n" streamingChildren}
|
||||
'';
|
||||
|
||||
"go.d.conf" = pkgs.writeText "go.d.conf" (builtins.toJSON {
|
||||
"modules"."systemdunits" = true;
|
||||
});
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 19999 ];
|
||||
# We are not the child.
|
||||
federez.monitoring.enableChild = false;
|
||||
}
|
10
profiles/prometheus-node-exporter.nix
Normal file
10
profiles/prometheus-node-exporter.nix
Normal file
|
@ -0,0 +1,10 @@
|
|||
{ network, config, name, ... }: let
|
||||
port = config.services.prometheus.exporters.node.port;
|
||||
node = network.infra.nodes.${name};
|
||||
in {
|
||||
infra.firewall.allowedTCPPorts = [ port ];
|
||||
|
||||
services.prometheus.exporters.node = {
|
||||
enable = true;
|
||||
};
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
{ ... }:
|
||||
let
|
||||
mkScrapeConfig = name: targets: {
|
||||
job_name = name;
|
||||
static_configs = [ { targets = targets; } ];
|
||||
};
|
||||
nodesConfig = mkScrapeConfig "node"
|
||||
(map (n: "${n}.federez.net:9100") [ "dodecagon" "saigon" ]);
|
||||
in {
|
||||
services.victoriametrics = {
|
||||
enable = true;
|
||||
prometheusConfig = {
|
||||
scrape_configs = [ nodesConfig ];
|
||||
};
|
||||
};
|
||||
}
|
|
@ -1,19 +1,9 @@
|
|||
{ config, pkgs, ... }:
|
||||
{
|
||||
age.secrets = {
|
||||
vogon-wg-infra-key = {
|
||||
file = ../secrets/vogon-wg-infra-key.age;
|
||||
owner = "root";
|
||||
group = "root";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.systemd-networkd.serviceConfig.LoadCredential = [
|
||||
"wg-infra-key:${config.age.secrets.vogon-wg-infra-key.path}"
|
||||
imports = [
|
||||
./infra.nix
|
||||
];
|
||||
|
||||
environment.systemPackages = [ pkgs.wireguard-tools ];
|
||||
|
||||
# FIXME I suck. I didn't manage to configure a working ZFS rootfs with disko
|
||||
# It was 1 AM, and the server had to be up and running quickly, so I
|
||||
# partitioned the server manually
|
||||
|
@ -64,140 +54,60 @@
|
|||
"sr_mod"
|
||||
];
|
||||
|
||||
# FIXME
|
||||
networking.firewall.trustedInterfaces = [ "wg-infra" "vxl-infra" "br-infra" ];
|
||||
|
||||
systemd.network.links = {
|
||||
"10-phy1" = {
|
||||
matchConfig.MACAddress = "18:66:da:75:da:04";
|
||||
linkConfig.Name = "phy1";
|
||||
systemd.network = {
|
||||
links = {
|
||||
"10-phy1" = {
|
||||
matchConfig.MACAddress = "18:66:da:75:da:04";
|
||||
linkConfig.Name = "phy1";
|
||||
};
|
||||
"10-phy2" = {
|
||||
matchConfig.MACAddress = "18:66:da:75:da:05";
|
||||
linkConfig.Name = "phy2";
|
||||
};
|
||||
};
|
||||
"10-phy2" = {
|
||||
matchConfig.MACAddress = "18:66:da:75:da:05";
|
||||
linkConfig.Name = "phy2";
|
||||
netdevs = {
|
||||
"10-wan".netdevConfig = {
|
||||
Name = "wan";
|
||||
Kind = "bridge";
|
||||
};
|
||||
"10-bond" = {
|
||||
netdevConfig = {
|
||||
Name = "bond";
|
||||
Kind = "bond";
|
||||
};
|
||||
bondConfig.Mode = "802.3ad";
|
||||
};
|
||||
};
|
||||
networks = {
|
||||
"10-phy1" = {
|
||||
matchConfig.Name = "phy1";
|
||||
networkConfig.Bond = "bond";
|
||||
};
|
||||
"10-phy2" = {
|
||||
matchConfig.Name = "phy2";
|
||||
networkConfig.Bond = "bond";
|
||||
};
|
||||
"10-bond" = {
|
||||
matchConfig.Name = "bond";
|
||||
networkConfig.Bridge = "wan";
|
||||
};
|
||||
"10-wan" = {
|
||||
matchConfig.Name = "wan";
|
||||
address = [ "193.54.193.161/28" ];
|
||||
routes = [ { Gateway = "193.54.193.174"; } ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
systemd.network.netdevs = {
|
||||
"10-wan".netdevConfig = {
|
||||
Name = "wan";
|
||||
Kind = "bridge";
|
||||
};
|
||||
"10-bond" = {
|
||||
netdevConfig = {
|
||||
Name = "bond";
|
||||
Kind = "bond";
|
||||
};
|
||||
bondConfig.Mode = "802.3ad";
|
||||
};
|
||||
"10-br-infra".netdevConfig = {
|
||||
Name = "br-infra";
|
||||
Kind = "bridge";
|
||||
};
|
||||
"10-vxl-infra" = {
|
||||
netdevConfig = {
|
||||
Name = "vxl-infra";
|
||||
Kind = "vxlan";
|
||||
};
|
||||
vxlanConfig = {
|
||||
Local = "fd0a:66d3:1c19:1000::1";
|
||||
VNI = 42;
|
||||
MacLearning = true;
|
||||
DestinationPort = 4789;
|
||||
};
|
||||
};
|
||||
"10-wg-infra" = {
|
||||
netdevConfig = {
|
||||
Name = "wg-infra";
|
||||
Kind = "wireguard";
|
||||
};
|
||||
wireguardConfig = {
|
||||
ListenPort = 51039;
|
||||
PrivateKey = "@wg-infra-key";
|
||||
};
|
||||
wireguardPeers = [
|
||||
{
|
||||
PublicKey = "JfTsY3+jPTDgLDrECoSvoYs+6+GpjII0ookjhFhd5SY=";
|
||||
Endpoint = "89.234.162.224:51039";
|
||||
AllowedIPs = [ "fd0a:66d3:1c19:1000::2" ];
|
||||
PersistentKeepalive = 10;
|
||||
}
|
||||
{
|
||||
PublicKey = "nOeLgmE1U6nY3UNxltQKwlID9lD7fvpEwij2XUvEGgg=";
|
||||
Endpoint = "137.194.12.129:51039";
|
||||
AllowedIPs = [ "fd0a:66d3:1c19:1000::3" ];
|
||||
PersistentKeepalive = 10;
|
||||
}
|
||||
{
|
||||
PublicKey = "9pGyE4+CQl+f8sFJ/Mkvp14yxDQJ0SJmGnher5Tgzjc=";
|
||||
Endpoint = "193.48.225.201:51039";
|
||||
AllowedIPs = [ "fd0a:66d3:1c19:1000::4" ];
|
||||
PersistentKeepalive = 10;
|
||||
}
|
||||
];
|
||||
age.secrets = {
|
||||
vogon-wg-infra-key = {
|
||||
file = ../secrets/vogon-wg-infra-key.age;
|
||||
owner = "root";
|
||||
group = "root";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.network.networks = {
|
||||
"10-phy1" = {
|
||||
matchConfig.Name = "phy1";
|
||||
networkConfig.Bond = "bond";
|
||||
};
|
||||
"10-phy2" = {
|
||||
matchConfig.Name = "phy2";
|
||||
networkConfig.Bond = "bond";
|
||||
};
|
||||
"10-bond" = {
|
||||
matchConfig.Name = "bond";
|
||||
networkConfig.Bridge = "wan";
|
||||
};
|
||||
"10-wan" = {
|
||||
matchConfig.Name = "wan";
|
||||
address = [ "193.54.193.161/28" ];
|
||||
routes = [
|
||||
{
|
||||
Gateway = "193.54.193.174";
|
||||
}
|
||||
];
|
||||
};
|
||||
"10-br-infra" = {
|
||||
matchConfig.Name = "br-infra";
|
||||
linkConfig.MACAddress = "9E:D8:78:A1:CE:22";
|
||||
address = [
|
||||
"fd0a:66d3:1c19:42::1/64"
|
||||
"10.42.0.1/16"
|
||||
];
|
||||
};
|
||||
"10-vxl-infra" = {
|
||||
matchConfig.Name = "vxl-infra";
|
||||
networkConfig = {
|
||||
Bridge = "br-infra";
|
||||
LinkLocalAddressing = false;
|
||||
};
|
||||
bridgeFDBs = [
|
||||
{
|
||||
MACAddress = "00:00:00:00:00:00";
|
||||
Destination = "fd0a:66d3:1c19:1000::2";
|
||||
VNI = 42;
|
||||
}
|
||||
{
|
||||
MACAddress = "00:00:00:00:00:00";
|
||||
Destination = "fd0a:66d3:1c19:1000::3";
|
||||
VNI = 42;
|
||||
}
|
||||
{
|
||||
MACAddress = "00:00:00:00:00:00";
|
||||
Destination = "fd0a:66d3:1c19:1000::4";
|
||||
VNI = 42;
|
||||
}
|
||||
];
|
||||
};
|
||||
"10-wg-infra" = {
|
||||
matchConfig.Name = "wg-infra";
|
||||
networkConfig = {
|
||||
Address = "fd0a:66d3:1c19:1000::1/64";
|
||||
VXLAN = "vxl-infra";
|
||||
};
|
||||
};
|
||||
infra.hub = {
|
||||
privateKeyPath = config.age.secrets.vogon-wg-infra-key.path;
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue