Trie Data Structure: Prefix Search and Autocomplete

A trie (prefix tree) stores strings character by character, sharing prefixes between words. This makes prefix-based operations — autocomplete, spell checking, IP routing, and dictionary lookups — extr

Introduction#

A trie (prefix tree) stores strings character by character, sharing prefixes between words. This makes prefix-based operations — autocomplete, spell checking, IP routing, and dictionary lookups — extremely efficient. Where a hash table lookup requires the full key, a trie answers “all words starting with ‘pre’” in O(k) time where k is the prefix length.

Core Trie Implementation#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from __future__ import annotations
from typing import Optional, Iterator

class TrieNode:
    __slots__ = ("children", "is_end", "count", "value")

    def __init__(self):
        self.children: dict[str, TrieNode] = {}
        self.is_end: bool = False
        self.count: int = 0      # words passing through this node
        self.value: object = None  # optional associated value

class Trie:
    def __init__(self):
        self._root = TrieNode()
        self._size = 0

    def insert(self, word: str, value: object = None) -> None:
        node = self._root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
            node.count += 1

        if not node.is_end:
            self._size += 1
        node.is_end = True
        node.value = value

    def search(self, word: str) -> Optional[object]:
        """Returns value if word exists, None otherwise."""
        node = self._find_node(word)
        if node and node.is_end:
            return node.value
        return None

    def starts_with(self, prefix: str) -> bool:
        """Returns True if any word starts with prefix."""
        return self._find_node(prefix) is not None

    def _find_node(self, prefix: str) -> Optional[TrieNode]:
        node = self._root
        for char in prefix:
            if char not in node.children:
                return None
            node = node.children[char]
        return node

    def autocomplete(self, prefix: str, limit: int = 10) -> list[str]:
        """Return all words with the given prefix."""
        node = self._find_node(prefix)
        if node is None:
            return []

        results = []
        self._collect(node, prefix, results, limit)
        return results

    def _collect(
        self,
        node: TrieNode,
        current: str,
        results: list[str],
        limit: int,
    ) -> None:
        if len(results) >= limit:
            return
        if node.is_end:
            results.append(current)
        for char, child in sorted(node.children.items()):
            self._collect(child, current + char, results, limit)

    def delete(self, word: str) -> bool:
        """Delete a word. Returns True if word existed."""
        return self._delete(self._root, word, 0)

    def _delete(self, node: TrieNode, word: str, depth: int) -> bool:
        if depth == len(word):
            if not node.is_end:
                return False
            node.is_end = False
            node.value = None
            self._size -= 1
            return True

        char = word[depth]
        if char not in node.children:
            return False

        deleted = self._delete(node.children[char], word, depth + 1)
        if deleted:
            node.children[char].count -= 1
            if node.children[char].count == 0:
                del node.children[char]  # prune empty branch
        return deleted

    def __len__(self) -> int:
        return self._size

    def __contains__(self, word: str) -> bool:
        return self.search(word) is not None

Autocomplete with Frequency Ranking#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import heapq

class FrequencyTrie:
    """Trie that ranks autocomplete results by frequency."""

    def __init__(self):
        self._root = TrieNode()
        self._frequencies: dict[str, int] = {}

    def insert(self, word: str, frequency: int = 1) -> None:
        self._frequencies[word] = self._frequencies.get(word, 0) + frequency
        node = self._root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_end = True

    def autocomplete(self, prefix: str, limit: int = 5) -> list[tuple[str, int]]:
        """Return top-k words by frequency with the given prefix."""
        node_prefix = self._root
        for char in prefix:
            if char not in node_prefix.children:
                return []
            node_prefix = node_prefix.children[char]

        # Collect all words with this prefix
        candidates = []
        self._collect_words(node_prefix, prefix, candidates)

        # Return top-k by frequency
        return heapq.nlargest(limit, candidates, key=lambda x: x[1])

    def _collect_words(
        self,
        node: TrieNode,
        current: str,
        results: list[tuple[str, int]],
    ) -> None:
        if node.is_end:
            freq = self._frequencies.get(current, 0)
            results.append((current, freq))
        for char, child in node.children.items():
            self._collect_words(child, current + char, results)

# Usage: search engine autocomplete
trie = FrequencyTrie()
searches = [
    ("python", 5000), ("python tutorial", 3000), ("python list comprehension", 1500),
    ("python decorator", 800), ("pytorch", 4000), ("programming", 2000),
    ("program", 1500), ("promise", 500),
]
for word, freq in searches:
    trie.insert(word, freq)

print(trie.autocomplete("python"))
# [('python', 5000), ('pytorch', 4000), ('python tutorial', 3000), ...]

print(trie.autocomplete("pro"))
# [('programming', 2000), ('program', 1500), ...]

Compact Trie (Radix Tree)#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class RadixNode:
    """Node in a compressed trie (radix tree)."""

    def __init__(self, prefix: str = ""):
        self.prefix = prefix
        self.children: dict[str, RadixNode] = {}
        self.is_end = False

class RadixTree:
    """Radix tree: compressed trie that merges single-child chains."""

    def __init__(self):
        self._root = RadixNode()

    def insert(self, word: str) -> None:
        self._insert(self._root, word)

    def _insert(self, node: RadixNode, remaining: str) -> None:
        for key, child in node.children.items():
            common = self._common_prefix(key, remaining)
            if not common:
                continue

            if common == key:
                # Traverse into existing node
                self._insert(child, remaining[len(common):])
                return

            # Split the existing edge
            new_child = RadixNode(key[len(common):])
            new_child.children = child.children
            new_child.is_end = child.is_end

            child.prefix = common
            child.children = {key[len(common):]: new_child}
            child.is_end = False

            suffix = remaining[len(common):]
            if suffix:
                leaf = RadixNode(suffix)
                leaf.is_end = True
                child.children[suffix] = leaf
            else:
                child.is_end = True

            del node.children[key]
            node.children[common] = child
            return

        # No matching prefix, add new child
        leaf = RadixNode(remaining)
        leaf.is_end = True
        node.children[remaining] = leaf

    @staticmethod
    def _common_prefix(a: str, b: str) -> str:
        i = 0
        while i < len(a) and i < len(b) and a[i] == b[i]:
            i += 1
        return a[:i]

IP Routing with Tries#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Longest prefix match for IP routing
class IPRoutingTrie:
    """Trie for IP prefix matching (simplified, binary trie on IP bits)."""

    def __init__(self):
        self._root = [None, None, None]  # [left, right, next_hop]

    def add_route(self, prefix: str, prefix_len: int, next_hop: str) -> None:
        """Add a route: prefix is the IP string, prefix_len is the mask length."""
        ip_int = self._ip_to_int(prefix)
        node = self._root

        for bit_pos in range(31, 31 - prefix_len, -1):
            bit = (ip_int >> bit_pos) & 1
            if node[bit] is None:
                node[bit] = [None, None, None]
            node = node[bit]

        node[2] = next_hop  # store next hop at leaf

    def lookup(self, ip: str) -> Optional[str]:
        """Find the most specific route for the given IP."""
        ip_int = self._ip_to_int(ip)
        node = self._root
        best_hop = node[2]

        for bit_pos in range(31, -1, -1):
            bit = (ip_int >> bit_pos) & 1
            if node[bit] is None:
                break
            node = node[bit]
            if node[2] is not None:
                best_hop = node[2]  # update best match

        return best_hop

    @staticmethod
    def _ip_to_int(ip: str) -> int:
        parts = ip.split(".")
        result = 0
        for part in parts:
            result = (result << 8) | int(part)
        return result

Word Search with Trie (Backtracking)#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def find_words_in_board(board: list[list[str]], words: list[str]) -> list[str]:
    """Find all words from the list that exist in the 2D board (LeetCode 212)."""
    trie = Trie()
    for word in words:
        trie.insert(word)

    rows, cols = len(board), len(board[0])
    found = set()

    def backtrack(node: TrieNode, r: int, c: int, path: str) -> None:
        char = board[r][c]
        if char not in node.children:
            return

        next_node = node.children[char]
        path += char

        if next_node.is_end:
            found.add(path)

        board[r][c] = "#"  # mark visited
        for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            nr, nc = r + dr, c + dc
            if 0 <= nr < rows and 0 <= nc < cols and board[nr][nc] != "#":
                backtrack(next_node, nr, nc, path)
        board[r][c] = char  # restore

    for r in range(rows):
        for c in range(cols):
            backtrack(trie._root, r, c, "")

    return list(found)

Complexity Summary#

1
2
3
4
5
6
7
8
9
10
Operation              | Time     | Space
-----------------------|----------|-------
Insert word (length k) | O(k)     | O(k × alphabet_size)
Search word (length k) | O(k)     | O(1)
Prefix search          | O(p + n) | O(1) — p=prefix len, n=matches
Delete word            | O(k)     | O(1)
Autocomplete           | O(p + n) | O(n)

Space: O(total_characters × alphabet_size)
Compact trie (radix tree) reduces space for sparse datasets

Conclusion#

Tries outperform hash tables for prefix operations and ordered iteration. They are used in search engine autocomplete, DNS resolution, IP routing tables, spell checkers, and phone number lookups. The compressed variant (radix tree) reduces memory when the key set is sparse. When you need to answer “all strings starting with X” efficiently, a trie is the right structure. For simple exact-match lookups, a hash table remains more space-efficient.

Contents