TimescaleDB: Time-Series Data at Scale in PostgreSQL

Introduction#

TimescaleDB is a PostgreSQL extension that adds time-series optimization through hypertables — automatic time-based partitioning with compression and data retention policies. It enables storing millions of metrics, IoT readings, and events while maintaining fast queries, all within standard PostgreSQL. You keep familiar SQL, indexes, and tooling while gaining time-series performance.

Hypertables and Setup#

-- Install TimescaleDB extension
CREATE EXTENSION IF NOT EXISTS timescaledb;

-- Create a regular table first
CREATE TABLE metrics (
    time        TIMESTAMPTZ NOT NULL,
    device_id   TEXT NOT NULL,
    metric_name TEXT NOT NULL,
    value       DOUBLE PRECISION,
    tags        JSONB
);

-- Convert to hypertable: TimescaleDB partitions automatically by time
SELECT create_hypertable('metrics', 'time', chunk_time_interval => INTERVAL '1 day');

-- Each chunk = one day's worth of data (configurable)
-- Queries on recent data only scan recent chunks

-- Composite primary key common for time-series
CREATE INDEX ON metrics (device_id, metric_name, time DESC);
CREATE INDEX ON metrics USING GIN (tags);

-- Insert data — works like normal SQL
INSERT INTO metrics (time, device_id, metric_name, value)
VALUES
    (NOW(), 'sensor-001', 'temperature', 23.5),
    (NOW(), 'sensor-001', 'humidity', 67.2),
    (NOW() - INTERVAL '1 hour', 'sensor-002', 'temperature', 21.8);

Common Time-Series Queries#

-- Last value per device (latest reading)
SELECT DISTINCT ON (device_id)
    device_id,
    time,
    value
FROM metrics
WHERE metric_name = 'temperature'
ORDER BY device_id, time DESC;

-- Downsampled averages: 5-minute buckets
SELECT
    time_bucket('5 minutes', time) AS bucket,
    device_id,
    AVG(value) AS avg_temp,
    MIN(value) AS min_temp,
    MAX(value) AS max_temp
FROM metrics
WHERE metric_name = 'temperature'
    AND time > NOW() - INTERVAL '1 hour'
GROUP BY bucket, device_id
ORDER BY bucket DESC;

-- Gap filling: fill missing time buckets with NULL
SELECT
    time_bucket_gapfill('1 minute', time) AS bucket,
    device_id,
    AVG(value) AS avg_temp,
    last(value, time) AS last_temp  -- last value before gap
FROM metrics
WHERE metric_name = 'temperature'
    AND time > NOW() - INTERVAL '30 minutes'
    AND device_id = 'sensor-001'
GROUP BY bucket, device_id
ORDER BY bucket;

-- Moving average (7-point)
SELECT
    time,
    device_id,
    value,
    AVG(value) OVER (
        PARTITION BY device_id
        ORDER BY time
        ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
    ) AS moving_avg
FROM metrics
WHERE metric_name = 'temperature'
    AND time > NOW() - INTERVAL '2 hours'
ORDER BY time;

Continuous Aggregates#

-- Continuous aggregates: automatically maintained materialized views
-- Pre-aggregate data for fast dashboard queries

CREATE MATERIALIZED VIEW hourly_metrics
WITH (timescaledb.continuous) AS
SELECT
    time_bucket('1 hour', time) AS bucket,
    device_id,
    metric_name,
    AVG(value) AS avg_value,
    MIN(value) AS min_value,
    MAX(value) AS max_value,
    COUNT(*) AS sample_count
FROM metrics
GROUP BY bucket, device_id, metric_name
WITH NO DATA;  -- don't backfill yet

-- Set refresh policy: keep aggregate up to date automatically
SELECT add_continuous_aggregate_policy('hourly_metrics',
    start_offset => INTERVAL '3 hours',
    end_offset   => INTERVAL '1 hour',
    schedule_interval => INTERVAL '1 hour'
);

-- Query the continuous aggregate (much faster than raw data for large ranges)
SELECT
    bucket,
    device_id,
    avg_value
FROM hourly_metrics
WHERE metric_name = 'temperature'
    AND bucket > NOW() - INTERVAL '24 hours'
ORDER BY bucket DESC;

-- Real-time query: combines materialized + recent raw data automatically
SELECT
    time_bucket('1 hour', time) AS bucket,
    AVG(value) AS avg_temp
FROM metrics
WHERE metric_name = 'temperature'
    AND time > NOW() - INTERVAL '7 days'
GROUP BY bucket
ORDER BY bucket DESC;

Compression#

-- Compress chunks older than 7 days (60-90% storage reduction for time-series)
ALTER TABLE metrics SET (
    timescaledb.compress,
    timescaledb.compress_segmentby = 'device_id, metric_name',
    timescaledb.compress_orderby = 'time DESC'
);

-- Automatic compression policy
SELECT add_compression_policy('metrics', INTERVAL '7 days');

-- Check compression status
SELECT
    chunk_schema,
    chunk_name,
    before_compression_total_bytes / (1024*1024) AS before_mb,
    after_compression_total_bytes / (1024*1024) AS after_mb,
    compression_ratio
FROM chunk_compression_stats('metrics')
ORDER BY chunk_name;

-- Decompress a chunk (for backfill or debugging)
SELECT decompress_chunk('_timescaledb_internal._hyper_1_1_chunk');

Retention Policy#

-- Automatically drop data older than 90 days
SELECT add_retention_policy('metrics', INTERVAL '90 days');

-- Keep compressed data longer than raw data
-- Raw: 30 days, compressed: 1 year, continuous aggregate: forever
SELECT add_retention_policy('metrics', INTERVAL '30 days');
-- Let compressed chunks stay (compression policy already set to 7d+)

-- Custom: drop old chunks manually
SELECT drop_chunks('metrics', older_than => INTERVAL '90 days');

Python Integration#

import psycopg2
from psycopg2.extras import execute_values
from datetime import datetime, timedelta
import time

conn = psycopg2.connect("postgresql://localhost/metrics_db")

def batch_insert_metrics(readings: list[dict]) -> None:
    """Efficiently bulk-insert time-series data."""
    with conn.cursor() as cur:
        execute_values(cur, """
            INSERT INTO metrics (time, device_id, metric_name, value, tags)
            VALUES %s
        """, [
            (r["time"], r["device_id"], r["metric"], r["value"], r.get("tags"))
            for r in readings
        ])
    conn.commit()

def get_device_summary(device_id: str, hours: int = 24) -> list[dict]:
    """Get hourly summary for a device."""
    with conn.cursor() as cur:
        cur.execute("""
            SELECT
                time_bucket('1 hour', time) AS hour,
                metric_name,
                AVG(value) AS avg_value,
                MIN(value) AS min_value,
                MAX(value) AS max_value
            FROM metrics
            WHERE device_id = %s
                AND time > NOW() - INTERVAL '%s hours'
            GROUP BY hour, metric_name
            ORDER BY hour DESC, metric_name
        """, (device_id, hours))

        columns = [d[0] for d in cur.description]
        return [dict(zip(columns, row)) for row in cur.fetchall()]

# Simulate IoT sensor ingestion
import random

def simulate_sensors(num_sensors: int = 10, interval_sec: float = 1.0) -> None:
    while True:
        readings = [
            {
                "time": datetime.utcnow(),
                "device_id": f"sensor-{i:03d}",
                "metric": "temperature",
                "value": 20 + random.gauss(0, 2),
                "tags": {"location": f"room-{i % 5 + 1}"},
            }
            for i in range(num_sensors)
        ]
        batch_insert_metrics(readings)
        time.sleep(interval_sec)

Conclusion#

TimescaleDB delivers time-series performance within standard PostgreSQL by automatically partitioning data into time-based chunks. Continuous aggregates pre-compute expensive aggregations and stay current automatically. Compression reduces storage by 60-90% for older data without losing queryability. Retention policies automatically drop old data. The result is a production-ready time-series database that scales to billions of rows while staying compatible with every PostgreSQL tool, driver, and ecosystem component you already use.