SQL Injection: Patterns and Prevention

SQL injection remains the most exploited web application vulnerability despite being well-understood and preventable. It occurs when user-supplied input is included in a SQL query without proper param

Introduction#

SQL injection remains the most exploited web application vulnerability despite being well-understood and preventable. It occurs when user-supplied input is included in a SQL query without proper parameterization. A successful injection can result in data exfiltration, data deletion, authentication bypass, or remote code execution.

How SQL Injection Works#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# VULNERABLE: string interpolation in SQL
def get_user(username: str):
    query = f"SELECT * FROM users WHERE username = '{username}'"
    return db.execute(query)

# Attack: input = "admin' OR '1'='1"
# Resulting query:
# SELECT * FROM users WHERE username = 'admin' OR '1'='1'
# Returns all users — authentication bypassed

# Attack: input = "'; DROP TABLE users; --"
# Resulting query:
# SELECT * FROM users WHERE username = ''; DROP TABLE users; --'
# Deletes the users table

# Attack: UNION-based data extraction
# input = "' UNION SELECT username, password, null FROM users --"
# SELECT * FROM products WHERE id = '' UNION SELECT username, password, null FROM users --

Prevention: Parameterized Queries#

The only reliable prevention is parameterized queries (also called prepared statements). User input is never concatenated into the SQL string.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# SAFE: parameterized query with psycopg2
import psycopg2

def get_user(conn, username: str):
    with conn.cursor() as cur:
        cur.execute(
            "SELECT id, username, email FROM users WHERE username = %s",
            (username,)  # passed as parameter, not interpolated
        )
        return cur.fetchone()

# SAFE: SQLAlchemy ORM
from sqlalchemy.orm import Session
from app.models import User

def get_user(db: Session, username: str) -> User | None:
    return db.query(User).filter(User.username == username).first()

# SAFE: SQLAlchemy Core with text() — must use bindparams
from sqlalchemy import text

def get_user_raw(db: Session, username: str):
    result = db.execute(
        text("SELECT * FROM users WHERE username = :username"),
        {"username": username}
    )
    return result.fetchone()

# UNSAFE: using text() without parameters
def get_user_unsafe(db: Session, username: str):
    # WRONG: f-string in text() is still vulnerable
    result = db.execute(text(f"SELECT * FROM users WHERE username = '{username}'"))
    return result.fetchone()

asyncpg (Python)#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import asyncpg

async def get_user(pool: asyncpg.Pool, username: str):
    # asyncpg uses $1, $2, ... placeholders
    return await pool.fetchrow(
        "SELECT id, username FROM users WHERE username = $1",
        username
    )

async def search_users(pool: asyncpg.Pool, search: str):
    # LIKE with parameterization — escape % and _ in search term
    safe_search = search.replace("%", "\\%").replace("_", "\\_")
    return await pool.fetch(
        "SELECT id, username FROM users WHERE username LIKE $1 ESCAPE '\\'",
        f"%{safe_search}%"
    )

Dynamic Queries: Safe Patterns#

Sometimes query structure itself is dynamic (ORDER BY column, dynamic filters). These require careful handling.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from enum import Enum
from typing import Literal

# SAFE: validate column names against an allowlist
VALID_ORDER_COLUMNS = {"username", "created_at", "email"}
VALID_ORDER_DIRECTIONS: set[Literal["ASC", "DESC"]] = {"ASC", "DESC"}

def get_users_sorted(
    db,
    order_by: str = "created_at",
    direction: str = "DESC",
    limit: int = 50
) -> list:
    if order_by not in VALID_ORDER_COLUMNS:
        raise ValueError(f"Invalid order column: {order_by}")
    if direction not in VALID_ORDER_DIRECTIONS:
        raise ValueError(f"Invalid direction: {direction}")
    if not 1 <= limit <= 100:
        raise ValueError("Limit must be between 1 and 100")

    # Column names cannot be parameterized — use allowlist and format
    query = f"SELECT * FROM users ORDER BY {order_by} {direction} LIMIT %s"
    return db.execute(query, (limit,))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# SAFE: building dynamic WHERE clauses
def filter_orders(
    db,
    user_id: int | None = None,
    status: str | None = None,
    min_total: float | None = None,
) -> list:
    conditions = []
    params = []

    if user_id is not None:
        conditions.append("user_id = %s")
        params.append(user_id)

    if status is not None:
        valid_statuses = {"pending", "completed", "cancelled"}
        if status not in valid_statuses:
            raise ValueError(f"Invalid status: {status}")
        conditions.append("status = %s")
        params.append(status)

    if min_total is not None:
        conditions.append("total >= %s")
        params.append(min_total)

    where_clause = " AND ".join(conditions) if conditions else "TRUE"
    query = f"SELECT * FROM orders WHERE {where_clause}"
    return db.execute(query, params)

ORM vs Raw SQL#

ORMs use parameterized queries by default, but raw SQL is sometimes necessary. Always use the ORM’s parameterization mechanism.

1
2
3
4
5
6
7
8
9
10
# SQLAlchemy: SAFE ORM filter
users = db.query(User).filter(
    User.username == username,  # parameterized automatically
    User.status.in_(["active", "trial"])
).all()

# SQLAlchemy: UNSAFE — never do this
users = db.execute(
    text(f"SELECT * FROM users WHERE username = '{username}'")  # UNSAFE
)

Defense in Depth#

Parameterized queries are the primary prevention. Add these layers:

1
2
3
4
5
6
7
8
9
-- Least-privilege database user: read-only where possible
CREATE USER api_readonly WITH PASSWORD 'xxx';
GRANT SELECT ON users, orders TO api_readonly;
-- Cannot DROP TABLE even if injected

-- Separate users per function
CREATE USER api_writer WITH PASSWORD 'yyy';
GRANT SELECT, INSERT, UPDATE ON orders TO api_writer;
-- Cannot DELETE
1
2
3
4
5
6
# Input length validation at API layer
from pydantic import BaseModel, Field

class SearchRequest(BaseModel):
    query: str = Field(max_length=200)  # limit input length
    page: int = Field(ge=1, le=1000)

Conclusion#

SQL injection is prevented by one thing: parameterized queries. Never interpolate user input into SQL strings. Use %s or $1 placeholders with all database drivers. For dynamic column names and ORDER BY, validate against an explicit allowlist. ORMs are safer by default but can still be misused with raw SQL. Add a least-privilege database user as a second line of defense.

Contents