Python Logging Best Practices

Poor logging is one of the most common impediments to diagnosing production issues. Logs that are too verbose obscure signal in noise; logs that are too sparse leave you guessing. Unstructured logs ar

Introduction#

Poor logging is one of the most common impediments to diagnosing production issues. Logs that are too verbose obscure signal in noise; logs that are too sparse leave you guessing. Unstructured logs are difficult to query. This post covers Python logging configuration and patterns for production systems.

Structured Logging#

Plain text logs are difficult to query at scale. Structured logs (JSON) integrate with log aggregators (Elasticsearch, Loki, CloudWatch Logs Insights).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import logging
import json
import sys
import traceback
from datetime import datetime, timezone
from typing import Any

class JSONFormatter(logging.Formatter):
    def format(self, record: logging.LogRecord) -> str:
        log_entry: dict[str, Any] = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "level": record.levelname,
            "logger": record.name,
            "message": record.getMessage(),
            "module": record.module,
            "function": record.funcName,
            "line": record.lineno,
        }

        # Include exception traceback if present
        if record.exc_info:
            log_entry["exception"] = self.formatException(record.exc_info)

        # Include extra fields added by the logger
        for key, value in record.__dict__.items():
            if key not in logging.LogRecord.__dict__ and not key.startswith("_"):
                log_entry[key] = value

        return json.dumps(log_entry)

def configure_logging(level: str = "INFO") -> None:
    handler = logging.StreamHandler(sys.stdout)
    handler.setFormatter(JSONFormatter())

    root = logging.getLogger()
    root.setLevel(getattr(logging, level.upper()))
    root.handlers = [handler]

    # Silence noisy third-party libraries
    logging.getLogger("urllib3").setLevel(logging.WARNING)
    logging.getLogger("botocore").setLevel(logging.WARNING)

Using structlog#

structlog is the standard library for structured logging in Python.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import structlog
import logging

# Configure structlog
structlog.configure(
    processors=[
        structlog.contextvars.merge_contextvars,
        structlog.processors.add_log_level,
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.StackInfoRenderer(),
        structlog.processors.ExceptionRenderer(),
        structlog.processors.JSONRenderer(),
    ],
    wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
    context_class=dict,
    logger_factory=structlog.PrintLoggerFactory(),
)

log = structlog.get_logger()

# Basic usage
log.info("order_created", order_id=123, user_id=42, total=99.99)
# {"event": "order_created", "order_id": 123, "user_id": 42, "total": 99.99, ...}

log.error("payment_failed", order_id=123, error_code="CARD_DECLINED")

Request Context Binding#

Bind request context (trace ID, user ID) once per request; it is automatically included in all subsequent log calls.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import structlog
from fastapi import FastAPI, Request
import uuid

app = FastAPI()

@app.middleware("http")
async def log_requests(request: Request, call_next):
    request_id = request.headers.get("X-Request-ID") or str(uuid.uuid4())
    user_id = getattr(request.state, "user_id", None)

    # Bind context for all logs in this request
    structlog.contextvars.bind_contextvars(
        request_id=request_id,
        user_id=user_id,
        method=request.method,
        path=request.url.path,
    )

    log = structlog.get_logger()
    log.info("request_started")

    try:
        response = await call_next(request)
        log.info("request_completed", status_code=response.status_code)
        return response
    except Exception as e:
        log.error("request_failed", error=str(e), exc_info=True)
        raise
    finally:
        structlog.contextvars.clear_contextvars()

Log Levels: When to Use Each#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
log = structlog.get_logger()

# DEBUG: detailed diagnostic info — disabled in production
log.debug("cache_lookup", key="user:42", hit=False)

# INFO: normal operations worth recording
log.info("user_logged_in", user_id=42)
log.info("payment_processed", order_id=123, amount=99.99)

# WARNING: unexpected but handled situation
log.warning("rate_limit_approaching", user_id=42, requests_remaining=5)
log.warning("config_value_missing", key="FEATURE_FLAG_X", using_default=True)

# ERROR: operation failed, requires attention
log.error("payment_failed", order_id=123, error="CARD_DECLINED", exc_info=True)

# CRITICAL: system is unusable
log.critical("database_unavailable", retries=5)

What to Log#

Always log:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# External calls (with duration)
import time

start = time.monotonic()
result = await external_api.call()
duration_ms = (time.monotonic() - start) * 1000
log.info("external_api_called", service="payment", duration_ms=round(duration_ms, 2))

# State transitions
log.info("order_status_changed", order_id=123, from_status="pending", to_status="completed")

# Errors with context
try:
    process_order(order)
except Exception:
    log.error("order_processing_failed", order_id=order.id, exc_info=True)
    raise

Never log:

1
2
3
4
5
6
7
8
# NEVER log sensitive data
log.info("user_authenticated", password=password)        # WRONG
log.info("card_charged", card_number=card.number)       # WRONG
log.info("api_call", authorization_header=headers["Authorization"])  # WRONG

# Safe: log only non-sensitive identifiers
log.info("user_authenticated", user_id=user.id)
log.info("card_charged", card_last_four=card.number[-4:])

Log Sampling for High-Volume Events#

1
2
3
4
5
6
7
8
9
import random

def should_sample(rate: float = 0.01) -> bool:
    """Sample rate: 0.01 = log 1% of events."""
    return random.random() < rate

# For very high-frequency events, sample to avoid log volume
if should_sample(0.001):  # 0.1% of requests
    log.debug("cache_hit", key=key)

Correlation with Distributed Tracing#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from opentelemetry import trace

log = structlog.get_logger()

def get_trace_context() -> dict:
    span = trace.get_current_span()
    if span.is_recording():
        ctx = span.get_span_context()
        return {
            "trace_id": format(ctx.trace_id, "032x"),
            "span_id": format(ctx.span_id, "016x"),
        }
    return {}

# Bind trace context to all logs
structlog.contextvars.bind_contextvars(**get_trace_context())

Conclusion#

Use structured (JSON) logging with a library like structlog. Bind request context once per request — trace ID, user ID, request path. Use log levels correctly: INFO for normal operations, ERROR for failures, DEBUG for development. Never log passwords, tokens, or PII. Sample high-frequency debug logs rather than disabling them entirely.

Contents