remove unnecessary auth, and automate slave<->master first synchronisation

This commit is contained in:
asyncnomi 2025-08-02 19:46:25 +02:00
parent a8b2476288
commit fd3c1e8537
3 changed files with 46 additions and 57 deletions

View file

@ -1,11 +1,3 @@
## Note: This file wont setup the replication by itself
## Once deploy follow those instruction to run the streaming replication
## Execute on each slave (replace major version an ip addr accordingly):
# 1. sudo systemctl stop postgresql.service
# 2. sudo rm -r /var/lib/postgresql/17
# 3. pg_basebackup -h 172.19.1.2 -U replication -p 5432 -D /var/lib/postgresql/17 -P -Xs
# 4. sudo systemctl restart postgresql.service
{ config, pkgs, lib, ... }:
let
@ -23,16 +15,6 @@ let
masterIP = "172.19.${toString masterNode.zone}.${toString masterNode.id}";
in
{
age.secrets."repli" = {
file = ./../../secrets/db/repli.age;
owner = "postgres";
group = "postgres";
};
systemd.services.postgresql.environment = lib.mkIf (builtins.elem myName mapping.db.slaves) {
# Currently unused cause pwd is mannually set during basebackup
PGPASSFILE = "${config.age.secrets.repli.path}";
};
services.postgresql = {
enable = true;
# Force postgres package major version
@ -43,16 +25,19 @@ in
superuser_map root postgres
superuser_map postgres postgres
'';
# Replication tasks are not authenticated
# The wireguard mesh cryptographically
# ensures the sender is who we expect.
authentication = lib.mkForce (builtins.concatStringsSep "\n" ([''
#type database DBuser auth-method optional_ident_map
local all all peer map=superuser_map
'']
++ lib.optionals (myName == mapping.db.master)
(map (slaveName: let slaveNode = nodes.${slaveName}; in
"host replication replication 172.19.${toString slaveNode.zone}.${toString slaveNode.id}/32 md5"
"host replication replication 172.19.${toString slaveNode.zone}.${toString slaveNode.id}/32 trust"
) mapping.db.slaves)
++ lib.optionals (builtins.elem myName mapping.db.slaves) [
"host replication replication ${masterIP}/32 md5"
"host replication replication ${masterIP}/32 trust"
]));
ensureUsers = lib.mkIf (myName == mapping.db.master) [{
name = "replication";
@ -65,30 +50,53 @@ in
log_statement = "none";
logging_collector = true;
log_disconnections = true;
} // lib.optionalAttrs (myName == mapping.db.master) {
wal_level = "logical";
wal_sender_timeout = "60s";
max_wal_senders = 16;
wal_level = "logical";
} // lib.optionalAttrs (myName == mapping.db.master) {
wal_sender_timeout = "60s";
wal_keep_size = 1000; # In MB
} // lib.optionalAttrs (builtins.elem myName mapping.db.slaves) {
wal_level = "logical";
wal_receiver_timeout = "60s";
max_wal_senders = 16;
# Should be override by postgreqql.auto.conf generated by pg_basebackup
primary_conninfo = "host=${masterIP} port=5432 user=replication";
hot_standby = "on";
primary_conninfo = "host=${masterIP} port=5432 user=replication";
};
};
# The password looks like: "*:*:*:*:<password>"
# Cf: https://www.postgresql.org/docs/current/libpq-pgpass.html
systemd.services.postgresql.postStart = lib.mkIf (myName == mapping.db.master) ''
$PSQL -tA <<'EOF'
DO $$
DECLARE password TEXT;
BEGIN
password := trim(both from split_part(replace(pg_read_file('${config.age.secrets.repli.path}'), E'\n', '''), ':', 5));
EXECUTE format('ALTER USER replication WITH PASSWORD '''%s''';', password);
END $$;
EOF
# This preStart script sync the slaves to the master
# systemd.services.<name>.preStart has a mergeable type
systemd.services.postgresql.preStart = lib.mkIf (builtins.elem myName mapping.db.slaves) ''
if test -e ${cfg.dataDir}/.first_startup; then
echo "Setting up PostgreSQL slave replication..."
# This is a sl that's defined by the default preStart script
# We need an empty dataDir for the pg_basebackup
# And there is no easy ways that I'm aware of
# to get the hash of that file without recomputing it
PSQL_CONF_PATH="${cfg.dataDir}/postgresql.conf"
PSQL_CONF_TARGET=$(readlink "$PSQL_CONF_PATH")
# Remove data dir
if [ -d "${cfg.dataDir}" ]; then
echo "Deleting postgres data dir: ${cfg.dataDir}"
rm -rf "${cfg.dataDir}"
fi
# Perform base backup from master
echo "Starting base backup from master: ${masterIP}"
${cfg.package}/bin/pg_basebackup \
-h "${masterIP}" \
-U replication \
-p ${toString cfg.settings.port} \
-D "${cfg.dataDir}" \
-P \
-Xs \
-R
# Symlink back the psql configFile
ln -sf "$PSQL_CONF_TARGET" "$PSQL_CONF_PATH"
echo "PostgreSQL slave setup completed successfully"
else
echo "PostgreSQL standby already configured (standby.signal exists), skipping base backup"
fi
'';
}