Introduction#
PostgreSQL replication creates standby copies of your database for high availability, read scaling, and disaster recovery. Physical (streaming) replication copies byte-for-byte changes to the entire cluster. Logical replication selectively copies changes to specific tables and supports cross-version and cross-platform replication. Understanding both and their failover mechanisms is essential for production database operations.
Streaming Replication (Physical)#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| # Primary server: postgresql.conf
wal_level = replica # minimum for streaming replication
max_wal_senders = 5 # number of concurrent standbys
wal_keep_size = 1GB # retain WAL for slow standbys
synchronous_commit = on # sync commit (safe) or off (faster, risk of data loss)
# Primary: pg_hba.conf — allow replication connections
# host replication replicator 192.168.1.0/24 scram-sha-256
# Create replication user on primary
CREATE USER replicator REPLICATION LOGIN PASSWORD 'strongpassword';
# Initialize standby from primary using pg_basebackup
pg_basebackup \
--host=primary-host \
--port=5432 \
--username=replicator \
--pgdata=/var/lib/postgresql/data \
--wal-method=stream \
--write-recovery-conf \
--progress \
--verbose
# Standby: postgresql.conf
hot_standby = on # allow read queries on standby
primary_conninfo = 'host=primary-host port=5432 user=replicator password=strongpassword'
|
Monitoring Replication Lag#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
| -- On primary: check replication status
SELECT
client_addr,
state,
sent_lsn,
write_lsn,
flush_lsn,
replay_lsn,
write_lag,
flush_lag,
replay_lag
FROM pg_stat_replication;
-- Lag in bytes
SELECT
client_addr,
pg_wal_lsn_diff(sent_lsn, replay_lsn) AS replay_lag_bytes
FROM pg_stat_replication;
-- On standby: check how far behind
SELECT
now() - pg_last_xact_replay_timestamp() AS replication_lag,
pg_is_in_recovery() AS is_standby,
pg_last_wal_replay_lsn() AS last_replayed_lsn;
|
Logical Replication#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
| -- Logical replication: replicate specific tables, not the whole cluster
-- Use cases: zero-downtime major version upgrades, partial replication,
-- cross-database replication, CDC (change data capture)
-- PRIMARY: configure
-- postgresql.conf: wal_level = logical
-- Create publication on primary
CREATE PUBLICATION app_publication FOR TABLE users, orders, products;
-- SUBSCRIBER: create subscription pointing to primary
CREATE SUBSCRIPTION app_subscription
CONNECTION 'host=primary-host dbname=app user=replicator password=strongpassword'
PUBLICATION app_publication;
-- Monitor logical replication
SELECT
subname,
pid,
received_lsn,
latest_end_lsn,
latest_end_time
FROM pg_stat_subscription;
-- On primary: confirm publication is set up
SELECT pubname, puballtables FROM pg_publication;
SELECT * FROM pg_publication_tables;
|
Patroni: Automated Failover#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
| # patroni.yml — high availability PostgreSQL cluster management
scope: postgres-cluster
name: node1
restapi:
listen: 0.0.0.0:8008
connect_address: 192.168.1.10:8008
etcd:
hosts: 192.168.1.20:2379,192.168.1.21:2379,192.168.1.22:2379
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 30
maximum_lag_on_failover: 1048576 # 1MB — max lag for failover candidate
pg_hba:
- host replication replicator 0.0.0.0/0 scram-sha-256
- host all all 0.0.0.0/0 scram-sha-256
postgresql:
listen: 0.0.0.0:5432
connect_address: 192.168.1.10:5432
data_dir: /var/lib/postgresql/data
authentication:
replication:
username: replicator
password: strongpassword
superuser:
username: postgres
password: postgrespassword
parameters:
max_connections: 200
wal_level: replica
hot_standby: "on"
wal_keep_size: "1GB"
|
HAProxy for Connection Routing#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
| # HAProxy routes writes to primary, reads to standby
# Patroni REST API exposes /primary and /replica endpoints
frontend postgres_write
bind *:5432
default_backend postgres_primary
frontend postgres_read
bind *:5433
default_backend postgres_standby
backend postgres_primary
option tcp-check
server node1 192.168.1.10:5432 check port 8008 httpchk GET /primary
server node2 192.168.1.11:5432 check port 8008 httpchk GET /primary backup
server node3 192.168.1.12:5432 check port 8008 httpchk GET /primary backup
backend postgres_standby
balance roundrobin
option tcp-check
server node1 192.168.1.10:5432 check port 8008 httpchk GET /replica
server node2 192.168.1.11:5432 check port 8008 httpchk GET /replica
server node3 192.168.1.12:5432 check port 8008 httpchk GET /replica
|
Point-in-Time Recovery (PITR)#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| # Configure continuous archiving for PITR
# postgresql.conf:
archive_mode = on
archive_command = 'aws s3 cp %p s3://my-wal-archive/%f'
restore_command = 'aws s3 cp s3://my-wal-archive/%f %p'
# Restore to a specific time (e.g., 5 minutes before accidental DELETE)
# 1. Stop PostgreSQL
# 2. Restore base backup
# 3. Configure recovery target
cat > /var/lib/postgresql/data/postgresql.conf <<EOF
restore_command = 'aws s3 cp s3://my-wal-archive/%f %p'
recovery_target_time = '2025-11-15 14:35:00+00'
recovery_target_action = 'promote' # promote to primary when target reached
EOF
# 4. Create recovery signal file
touch /var/lib/postgresql/data/recovery.signal
# 5. Start PostgreSQL — it will replay WAL until the target time
|
Conclusion#
Streaming replication provides real-time standby copies with sub-second lag for high availability and read scaling. Logical replication enables selective replication and is the standard approach for zero-downtime major version upgrades. Patroni with etcd/Consul automates failover so standby promotion happens in seconds without manual intervention. Continuous WAL archiving to S3 enables point-in-time recovery — the critical safety net for accidental data loss. For most production setups, use Patroni + HAProxy + streaming replication as the primary HA mechanism, with PITR archiving for disaster recovery.