Introduction#
PostgreSQL includes a powerful full-text search engine built on inverted indexes. It handles stemming, stop words, ranking, phrase search, and faceted filtering — without an external search service. For many applications, PostgreSQL full-text search is sufficient, and avoiding Elasticsearch means fewer systems to operate.
Core Concepts#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| -- tsvector: preprocessed document representation
-- Tokens are normalized (stemmed), stop words removed
SELECT to_tsvector('english', 'The quick brown foxes jumped over the lazy dogs');
-- 'brown':3 'dog':9 'fox':4 'jump':5 'lazi':8 'quick':2
-- Note: "The", "over" removed (stop words); "foxes"→"fox", "jumped"→"jump"
-- tsquery: search query
SELECT to_tsquery('english', 'quick & fox'); -- AND
SELECT to_tsquery('english', 'quick | slow'); -- OR
SELECT to_tsquery('english', '!lazy'); -- NOT
SELECT plainto_tsquery('english', 'quick fox'); -- implicit AND (user input)
SELECT phraseto_tsquery('english', 'quick brown'); -- phrase match
-- Match check
SELECT to_tsvector('english', 'The quick brown fox') @@ to_tsquery('english', 'fox');
-- true
|
Full-Text Search Table Setup#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| -- Articles table with full-text search
CREATE TABLE articles (
id BIGSERIAL PRIMARY KEY,
title TEXT NOT NULL,
body TEXT NOT NULL,
tags TEXT[],
author_id BIGINT,
created_at TIMESTAMPTZ DEFAULT NOW(),
-- Stored tsvector: updated by trigger, indexed for fast search
search_vector tsvector GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(body, '')), 'B')
) STORED
);
-- GIN index on the tsvector column
CREATE INDEX ON articles USING GIN (search_vector);
-- Also useful: trigram index for LIKE/ILIKE on short strings
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX ON articles USING GIN (title gin_trgm_ops);
|
Basic Search Query#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| -- Simple search
SELECT id, title, created_at
FROM articles
WHERE search_vector @@ plainto_tsquery('english', 'kubernetes deployment')
ORDER BY created_at DESC
LIMIT 20;
-- With ranking: ts_rank considers term frequency and position
SELECT
id,
title,
ts_rank(search_vector, query) AS rank
FROM
articles,
plainto_tsquery('english', 'kubernetes deployment') AS query
WHERE
search_vector @@ query
ORDER BY rank DESC
LIMIT 10;
-- ts_rank_cd: considers cover density (term proximity)
SELECT
id,
title,
ts_rank_cd(search_vector, query) AS rank
FROM
articles,
plainto_tsquery('english', 'kubernetes deployment') AS query
WHERE
search_vector @@ query
ORDER BY rank DESC;
|
Search Highlighting#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| -- ts_headline: highlight matching terms in result
SELECT
id,
ts_headline(
'english',
title,
to_tsquery('english', 'kubernetes'),
'MaxWords=10, MinWords=5, ShortWord=3, StartSel=<mark>, StopSel=</mark>'
) AS highlighted_title,
ts_headline(
'english',
left(body, 500), -- don't pass the whole article
to_tsquery('english', 'kubernetes'),
'MaxFragments=2, MaxWords=30, StartSel=<mark>, StopSel=</mark>'
) AS excerpt
FROM articles
WHERE search_vector @@ to_tsquery('english', 'kubernetes')
LIMIT 5;
|
Advanced: Phrase and Proximity Search#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| -- Phrase search: "container orchestration" as a phrase
SELECT title FROM articles
WHERE search_vector @@ phraseto_tsquery('english', 'container orchestration');
-- Proximity: "kubernetes" within 3 words of "scaling"
SELECT title FROM articles
WHERE search_vector @@ to_tsquery('english', 'kubernetes <3> scaling');
-- Complex query: (kubernetes OR k8s) AND (deploy OR scale) AND NOT beginner
SELECT title FROM articles
WHERE search_vector @@
to_tsquery('english', '(kubernetes | k8s) & (deploy | scale) & !beginner');
-- Handle user input safely (plainto_tsquery is safe for arbitrary text)
SELECT title FROM articles
WHERE search_vector @@ plainto_tsquery('english', user_search_input);
|
Python Integration#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
| import psycopg2
from psycopg2.extras import RealDictCursor
conn = psycopg2.connect("postgresql://localhost/blog")
def search_articles(
query: str,
limit: int = 10,
offset: int = 0,
tags: list[str] | None = None,
) -> list[dict]:
"""Full-text search with optional tag filtering."""
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Use plainto_tsquery for user input (safe, no special characters needed)
sql = """
SELECT
a.id,
a.title,
a.created_at,
ts_rank_cd(a.search_vector, query) AS rank,
ts_headline(
'english', a.body,
query,
'MaxFragments=1, MaxWords=25, StartSel=<b>, StopSel=</b>'
) AS excerpt
FROM
articles a,
plainto_tsquery('english', %s) AS query
WHERE
a.search_vector @@ query
"""
params = [query]
if tags:
sql += " AND a.tags && %s" # array overlap
params.append(tags)
sql += " ORDER BY rank DESC LIMIT %s OFFSET %s"
params.extend([limit, offset])
cur.execute(sql, params)
return cur.fetchall()
def suggest_completions(prefix: str, limit: int = 5) -> list[str]:
"""Autocomplete using trigram similarity."""
with conn.cursor() as cur:
cur.execute("""
SELECT DISTINCT title
FROM articles
WHERE title ILIKE %s
ORDER BY title
LIMIT %s
""", (f"%{prefix}%", limit))
return [row[0] for row in cur.fetchall()]
# Usage
results = search_articles("kubernetes horizontal pod autoscaler")
for r in results:
print(f"{r['rank']:.3f} — {r['title']}")
print(f" {r['excerpt']}\n")
|
Search Configuration for Multiple Languages#
1
2
3
4
5
6
7
8
9
10
| -- Custom text search configuration
CREATE TEXT SEARCH CONFIGURATION english_custom (COPY = english);
-- Synonym dictionary: "k8s" expands to "kubernetes"
-- CREATE TEXT SEARCH DICTIONARY k8s_synonyms (TEMPLATE=synonym, SYNONYMS=k8s)
-- ALTER TEXT SEARCH CONFIGURATION english_custom
-- ALTER MAPPING FOR asciiword WITH k8s_synonyms, english_stem;
-- Check what a query produces with your config
SELECT to_tsvector('english_custom', 'k8s pods are containers');
|
Indexing Strategy#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| -- For large tables: partial index on recent content
CREATE INDEX articles_fts_recent ON articles
USING GIN (search_vector)
WHERE created_at > NOW() - INTERVAL '1 year';
-- Maintain search_vector with a trigger (for non-GENERATED columns)
CREATE OR REPLACE FUNCTION articles_search_vector_trigger()
RETURNS trigger AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.body, '')), 'B');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER articles_search_vector_update
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION articles_search_vector_trigger();
|
Conclusion#
PostgreSQL full-text search handles the majority of search use cases without Elasticsearch. Stored tsvector columns with GIN indexes deliver sub-10ms search on millions of rows. Weight A for titles (highly relevant) and weight B for body text creates quality ranking without tuning. ts_headline provides snippet extraction with highlighted terms for good UX. Add trigram indexes (pg_trgm) for autocomplete and ILIKE queries. Reach for Elasticsearch when you need distributed search across multiple shards, very advanced relevance tuning, or search as a first-class operational concern — otherwise PostgreSQL’s built-in FTS is the simpler, lower-overhead choice.