Introduction#
In a distributed cache or database cluster, you need to route requests to the correct node. Naive modulo hashing (hash(key) % N) routes correctly, but when N changes (adding or removing a node), nearly all keys remap to different nodes — causing a cache miss storm. Consistent hashing solves this: when a node is added or removed, only K/N keys need to remap, where K is the total number of keys and N is the number of nodes.
The Problem with Modulo Hashing#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| import hashlib
def naive_route(key: str, nodes: list[str]) -> str:
h = int(hashlib.md5(key.encode()).hexdigest(), 16)
return nodes[h % len(nodes)]
nodes_3 = ["node-1", "node-2", "node-3"]
nodes_4 = ["node-1", "node-2", "node-3", "node-4"]
# Check how many keys remap when adding a 4th node
remapped = sum(
1 for i in range(10000)
if naive_route(f"key:{i}", nodes_3) != naive_route(f"key:{i}", nodes_4)
)
print(f"Remapped: {remapped}/10000 = {remapped/100:.1f}%")
# Remapped: ~7500/10000 = 75% — nearly all keys moved
|
Consistent Hashing with a Virtual Ring#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
| import bisect
import hashlib
class ConsistentHashRing:
"""
Hash ring using virtual nodes.
Each physical node gets `replicas` virtual nodes on the ring.
More virtual nodes = more uniform distribution.
Typical values: 100-200 replicas.
"""
def __init__(self, replicas: int = 150):
self._replicas = replicas
self._ring: dict[int, str] = {} # hash → node_name
self._sorted_hashes: list[int] = [] # sorted list for bisect
def _hash(self, key: str) -> int:
return int(hashlib.md5(key.encode()).hexdigest(), 16)
def add_node(self, node: str) -> None:
"""Add a node to the ring. Only ~1/N keys need to migrate."""
for i in range(self._replicas):
virtual_key = self._hash(f"{node}:{i}")
self._ring[virtual_key] = node
bisect.insort(self._sorted_hashes, virtual_key)
def remove_node(self, node: str) -> None:
"""Remove a node. Only its keys migrate to the next node."""
for i in range(self._replicas):
virtual_key = self._hash(f"{node}:{i}")
if virtual_key in self._ring:
del self._ring[virtual_key]
self._sorted_hashes.remove(virtual_key)
def get_node(self, key: str) -> str:
"""Route a key to its responsible node."""
if not self._ring:
raise RuntimeError("No nodes in ring")
h = self._hash(key)
# Find the first virtual node clockwise from h
idx = bisect.bisect(self._sorted_hashes, h) % len(self._sorted_hashes)
return self._ring[self._sorted_hashes[idx]]
def get_nodes(self, key: str, count: int) -> list[str]:
"""Get `count` distinct nodes for replication."""
if count > len(self.nodes()):
raise ValueError("Not enough nodes")
h = self._hash(key)
idx = bisect.bisect(self._sorted_hashes, h)
nodes_seen = set()
result = []
for i in range(len(self._sorted_hashes)):
node = self._ring[self._sorted_hashes[(idx + i) % len(self._sorted_hashes)]]
if node not in nodes_seen:
nodes_seen.add(node)
result.append(node)
if len(result) == count:
break
return result
def nodes(self) -> set[str]:
return set(self._ring.values())
def distribution(self, num_keys: int = 10000) -> dict[str, int]:
"""Show how keys are distributed across nodes."""
counts: dict[str, int] = {n: 0 for n in self.nodes()}
for i in range(num_keys):
node = self.get_node(f"key:{i}")
counts[node] += 1
return counts
|
Demonstration: Minimal Remapping#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| ring = ConsistentHashRing(replicas=150)
ring.add_node("node-1")
ring.add_node("node-2")
ring.add_node("node-3")
# Record initial assignments
initial = {f"key:{i}": ring.get_node(f"key:{i}") for i in range(10000)}
# Add a 4th node
ring.add_node("node-4")
# Count remapped keys
remapped = sum(1 for k, v in initial.items() if ring.get_node(k) != v)
print(f"Remapped after adding node-4: {remapped}/10000 = {remapped/100:.1f}%")
# Expected: ~25% (1/4 of keys move to the new node)
# Remove a node
ring.remove_node("node-2")
after_removal = {f"key:{i}": ring.get_node(f"key:{i}") for i in range(10000)}
# Only node-2's keys should move (to node-3 or node-4)
print(f"Distribution: {ring.distribution()}")
|
Replication with Consistent Hashing#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
| class ReplicatedCache:
"""Consistent hashing with N-way replication."""
def __init__(self, nodes: list[str], replicas: int = 3):
self._ring = ConsistentHashRing()
for node in nodes:
self._ring.add_node(node)
self._replicas = replicas
self._connections = {node: connect(node) for node in nodes}
def set(self, key: str, value: str) -> None:
"""Write to N replica nodes."""
target_nodes = self._ring.get_nodes(key, self._replicas)
for node in target_nodes:
self._connections[node].set(key, value)
def get(self, key: str) -> str | None:
"""Read from primary node; fall back to replicas."""
nodes = self._ring.get_nodes(key, self._replicas)
for node in nodes:
try:
return self._connections[node].get(key)
except ConnectionError:
continue
return None
def add_node(self, node: str) -> None:
self._connections[node] = connect(node)
self._ring.add_node(node)
# Background task: migrate keys that now belong to this node
self._rebalance(node)
def _rebalance(self, new_node: str) -> None:
"""Move keys from their old owner to new_node if it's now closer."""
# In practice: use a rebalancing pipeline
# Here: simplified illustration
pass
|
Weighted Nodes (Heterogeneous Hardware)#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| class WeightedConsistentHashRing(ConsistentHashRing):
"""Assign more virtual nodes to more powerful servers."""
def add_node(self, node: str, weight: float = 1.0) -> None:
"""Weight > 1.0 gives the node more responsibility."""
num_replicas = int(self._replicas * weight)
for i in range(num_replicas):
virtual_key = self._hash(f"{node}:{i}")
self._ring[virtual_key] = node
bisect.insort(self._sorted_hashes, virtual_key)
# 4-core machine gets 1x weight, 16-core gets 4x
ring = WeightedConsistentHashRing(replicas=100)
ring.add_node("small-node", weight=1.0) # 100 virtual nodes
ring.add_node("large-node", weight=4.0) # 400 virtual nodes
# large-node handles ~80% of traffic
|
Consistent Hashing in Practice#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| Redis Cluster:
- 16384 hash slots distributed across nodes
- Each node owns a contiguous range of slots
- Key → CRC16(key) % 16384 → slot → node
- Adding/removing nodes: slot migration
Cassandra:
- Token ring (consistent hashing variant)
- Each node owns a token range
- Virtual nodes (vnodes): each physical node owns many token ranges
- Replication factor N: replicate to next N distinct nodes on ring
DynamoDB:
- Consistent hashing for partition distribution
- Partition key hashed to determine storage node
- Transparent to users (fully managed)
Use cases for custom implementation:
- Distributed in-memory cache (Memcached-like)
- Sharding a non-partitioned database
- Routing to stateful backend services
- Sticky session distribution
|
Conclusion#
Consistent hashing limits cache invalidation and data migration to O(K/N) keys when nodes are added or removed, compared to O(K) for modulo hashing. Virtual nodes are essential — they distribute responsibility evenly when the number of physical nodes is small. Use 100-200 virtual nodes per physical node. Weighted rings allow heterogeneous hardware to carry proportional load. Redis Cluster and Cassandra use variants of this principle under the hood, but understanding it helps you reason about their behavior during scaling events.