PostgreSQL Full-Text Search: tsvector, tsquery, and Ranking

PostgreSQL includes a powerful full-text search engine built on inverted indexes. It handles stemming, stop words, ranking, phrase search, and faceted filtering — without an external search service. F

Introduction#

PostgreSQL includes a powerful full-text search engine built on inverted indexes. It handles stemming, stop words, ranking, phrase search, and faceted filtering — without an external search service. For many applications, PostgreSQL full-text search is sufficient, and avoiding Elasticsearch means fewer systems to operate.

Core Concepts#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- tsvector: preprocessed document representation
-- Tokens are normalized (stemmed), stop words removed
SELECT to_tsvector('english', 'The quick brown foxes jumped over the lazy dogs');
-- 'brown':3 'dog':9 'fox':4 'jump':5 'lazi':8 'quick':2
-- Note: "The", "over" removed (stop words); "foxes"→"fox", "jumped"→"jump"

-- tsquery: search query
SELECT to_tsquery('english', 'quick & fox');       -- AND
SELECT to_tsquery('english', 'quick | slow');       -- OR
SELECT to_tsquery('english', '!lazy');              -- NOT
SELECT plainto_tsquery('english', 'quick fox');     -- implicit AND (user input)
SELECT phraseto_tsquery('english', 'quick brown');  -- phrase match

-- Match check
SELECT to_tsvector('english', 'The quick brown fox') @@ to_tsquery('english', 'fox');
-- true

Full-Text Search Table Setup#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
-- Articles table with full-text search
CREATE TABLE articles (
    id          BIGSERIAL PRIMARY KEY,
    title       TEXT NOT NULL,
    body        TEXT NOT NULL,
    tags        TEXT[],
    author_id   BIGINT,
    created_at  TIMESTAMPTZ DEFAULT NOW(),

    -- Stored tsvector: updated by trigger, indexed for fast search
    search_vector tsvector GENERATED ALWAYS AS (
        setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
        setweight(to_tsvector('english', coalesce(body, '')), 'B')
    ) STORED
);

-- GIN index on the tsvector column
CREATE INDEX ON articles USING GIN (search_vector);

-- Also useful: trigram index for LIKE/ILIKE on short strings
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX ON articles USING GIN (title gin_trgm_ops);

Basic Search Query#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
-- Simple search
SELECT id, title, created_at
FROM articles
WHERE search_vector @@ plainto_tsquery('english', 'kubernetes deployment')
ORDER BY created_at DESC
LIMIT 20;

-- With ranking: ts_rank considers term frequency and position
SELECT
    id,
    title,
    ts_rank(search_vector, query) AS rank
FROM
    articles,
    plainto_tsquery('english', 'kubernetes deployment') AS query
WHERE
    search_vector @@ query
ORDER BY rank DESC
LIMIT 10;

-- ts_rank_cd: considers cover density (term proximity)
SELECT
    id,
    title,
    ts_rank_cd(search_vector, query) AS rank
FROM
    articles,
    plainto_tsquery('english', 'kubernetes deployment') AS query
WHERE
    search_vector @@ query
ORDER BY rank DESC;

Search Highlighting#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
-- ts_headline: highlight matching terms in result
SELECT
    id,
    ts_headline(
        'english',
        title,
        to_tsquery('english', 'kubernetes'),
        'MaxWords=10, MinWords=5, ShortWord=3, StartSel=<mark>, StopSel=</mark>'
    ) AS highlighted_title,
    ts_headline(
        'english',
        left(body, 500),  -- don't pass the whole article
        to_tsquery('english', 'kubernetes'),
        'MaxFragments=2, MaxWords=30, StartSel=<mark>, StopSel=</mark>'
    ) AS excerpt
FROM articles
WHERE search_vector @@ to_tsquery('english', 'kubernetes')
LIMIT 5;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- Phrase search: "container orchestration" as a phrase
SELECT title FROM articles
WHERE search_vector @@ phraseto_tsquery('english', 'container orchestration');

-- Proximity: "kubernetes" within 3 words of "scaling"
SELECT title FROM articles
WHERE search_vector @@ to_tsquery('english', 'kubernetes <3> scaling');

-- Complex query: (kubernetes OR k8s) AND (deploy OR scale) AND NOT beginner
SELECT title FROM articles
WHERE search_vector @@
    to_tsquery('english', '(kubernetes | k8s) & (deploy | scale) & !beginner');

-- Handle user input safely (plainto_tsquery is safe for arbitrary text)
SELECT title FROM articles
WHERE search_vector @@ plainto_tsquery('english', user_search_input);

Python Integration#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import psycopg2
from psycopg2.extras import RealDictCursor

conn = psycopg2.connect("postgresql://localhost/blog")

def search_articles(
    query: str,
    limit: int = 10,
    offset: int = 0,
    tags: list[str] | None = None,
) -> list[dict]:
    """Full-text search with optional tag filtering."""
    with conn.cursor(cursor_factory=RealDictCursor) as cur:
        # Use plainto_tsquery for user input (safe, no special characters needed)
        sql = """
            SELECT
                a.id,
                a.title,
                a.created_at,
                ts_rank_cd(a.search_vector, query) AS rank,
                ts_headline(
                    'english', a.body,
                    query,
                    'MaxFragments=1, MaxWords=25, StartSel=<b>, StopSel=</b>'
                ) AS excerpt
            FROM
                articles a,
                plainto_tsquery('english', %s) AS query
            WHERE
                a.search_vector @@ query
        """
        params = [query]

        if tags:
            sql += " AND a.tags && %s"  # array overlap
            params.append(tags)

        sql += " ORDER BY rank DESC LIMIT %s OFFSET %s"
        params.extend([limit, offset])

        cur.execute(sql, params)
        return cur.fetchall()

def suggest_completions(prefix: str, limit: int = 5) -> list[str]:
    """Autocomplete using trigram similarity."""
    with conn.cursor() as cur:
        cur.execute("""
            SELECT DISTINCT title
            FROM articles
            WHERE title ILIKE %s
            ORDER BY title
            LIMIT %s
        """, (f"%{prefix}%", limit))
        return [row[0] for row in cur.fetchall()]

# Usage
results = search_articles("kubernetes horizontal pod autoscaler")
for r in results:
    print(f"{r['rank']:.3f}{r['title']}")
    print(f"  {r['excerpt']}\n")

Search Configuration for Multiple Languages#

1
2
3
4
5
6
7
8
9
10
-- Custom text search configuration
CREATE TEXT SEARCH CONFIGURATION english_custom (COPY = english);

-- Synonym dictionary: "k8s" expands to "kubernetes"
-- CREATE TEXT SEARCH DICTIONARY k8s_synonyms (TEMPLATE=synonym, SYNONYMS=k8s)
-- ALTER TEXT SEARCH CONFIGURATION english_custom
--     ALTER MAPPING FOR asciiword WITH k8s_synonyms, english_stem;

-- Check what a query produces with your config
SELECT to_tsvector('english_custom', 'k8s pods are containers');

Indexing Strategy#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
-- For large tables: partial index on recent content
CREATE INDEX articles_fts_recent ON articles
    USING GIN (search_vector)
    WHERE created_at > NOW() - INTERVAL '1 year';

-- Maintain search_vector with a trigger (for non-GENERATED columns)
CREATE OR REPLACE FUNCTION articles_search_vector_trigger()
RETURNS trigger AS $$
BEGIN
    NEW.search_vector :=
        setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') ||
        setweight(to_tsvector('english', coalesce(NEW.body, '')), 'B');
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER articles_search_vector_update
    BEFORE INSERT OR UPDATE ON articles
    FOR EACH ROW EXECUTE FUNCTION articles_search_vector_trigger();

Conclusion#

PostgreSQL full-text search handles the majority of search use cases without Elasticsearch. Stored tsvector columns with GIN indexes deliver sub-10ms search on millions of rows. Weight A for titles (highly relevant) and weight B for body text creates quality ranking without tuning. ts_headline provides snippet extraction with highlighted terms for good UX. Add trigram indexes (pg_trgm) for autocomplete and ILIKE queries. Reach for Elasticsearch when you need distributed search across multiple shards, very advanced relevance tuning, or search as a first-class operational concern — otherwise PostgreSQL’s built-in FTS is the simpler, lower-overhead choice.

Contents