Python Logging Best Practices

Introduction#

Poor logging is one of the most common impediments to diagnosing production issues. Logs that are too verbose obscure signal in noise; logs that are too sparse leave you guessing. Unstructured logs are difficult to query. This post covers Python logging configuration and patterns for production systems.

Structured Logging#

Plain text logs are difficult to query at scale. Structured logs (JSON) integrate with log aggregators (Elasticsearch, Loki, CloudWatch Logs Insights).

import logging
import json
import sys
import traceback
from datetime import datetime, timezone
from typing import Any

class JSONFormatter(logging.Formatter):
    def format(self, record: logging.LogRecord) -> str:
        log_entry: dict[str, Any] = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "level": record.levelname,
            "logger": record.name,
            "message": record.getMessage(),
            "module": record.module,
            "function": record.funcName,
            "line": record.lineno,
        }

        # Include exception traceback if present
        if record.exc_info:
            log_entry["exception"] = self.formatException(record.exc_info)

        # Include extra fields added by the logger
        for key, value in record.__dict__.items():
            if key not in logging.LogRecord.__dict__ and not key.startswith("_"):
                log_entry[key] = value

        return json.dumps(log_entry)

def configure_logging(level: str = "INFO") -> None:
    handler = logging.StreamHandler(sys.stdout)
    handler.setFormatter(JSONFormatter())

    root = logging.getLogger()
    root.setLevel(getattr(logging, level.upper()))
    root.handlers = [handler]

    # Silence noisy third-party libraries
    logging.getLogger("urllib3").setLevel(logging.WARNING)
    logging.getLogger("botocore").setLevel(logging.WARNING)

Using structlog#

structlog is the standard library for structured logging in Python.

import structlog
import logging

# Configure structlog
structlog.configure(
    processors=[
        structlog.contextvars.merge_contextvars,
        structlog.processors.add_log_level,
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.StackInfoRenderer(),
        structlog.processors.ExceptionRenderer(),
        structlog.processors.JSONRenderer(),
    ],
    wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
    context_class=dict,
    logger_factory=structlog.PrintLoggerFactory(),
)

log = structlog.get_logger()

# Basic usage
log.info("order_created", order_id=123, user_id=42, total=99.99)
# {"event": "order_created", "order_id": 123, "user_id": 42, "total": 99.99, ...}

log.error("payment_failed", order_id=123, error_code="CARD_DECLINED")

Request Context Binding#

Bind request context (trace ID, user ID) once per request; it is automatically included in all subsequent log calls.

import structlog
from fastapi import FastAPI, Request
import uuid

app = FastAPI()

@app.middleware("http")
async def log_requests(request: Request, call_next):
    request_id = request.headers.get("X-Request-ID") or str(uuid.uuid4())
    user_id = getattr(request.state, "user_id", None)

    # Bind context for all logs in this request
    structlog.contextvars.bind_contextvars(
        request_id=request_id,
        user_id=user_id,
        method=request.method,
        path=request.url.path,
    )

    log = structlog.get_logger()
    log.info("request_started")

    try:
        response = await call_next(request)
        log.info("request_completed", status_code=response.status_code)
        return response
    except Exception as e:
        log.error("request_failed", error=str(e), exc_info=True)
        raise
    finally:
        structlog.contextvars.clear_contextvars()

Log Levels: When to Use Each#

log = structlog.get_logger()

# DEBUG: detailed diagnostic info — disabled in production
log.debug("cache_lookup", key="user:42", hit=False)

# INFO: normal operations worth recording
log.info("user_logged_in", user_id=42)
log.info("payment_processed", order_id=123, amount=99.99)

# WARNING: unexpected but handled situation
log.warning("rate_limit_approaching", user_id=42, requests_remaining=5)
log.warning("config_value_missing", key="FEATURE_FLAG_X", using_default=True)

# ERROR: operation failed, requires attention
log.error("payment_failed", order_id=123, error="CARD_DECLINED", exc_info=True)

# CRITICAL: system is unusable
log.critical("database_unavailable", retries=5)

What to Log#

Always log:

# External calls (with duration)
import time

start = time.monotonic()
result = await external_api.call()
duration_ms = (time.monotonic() - start) * 1000
log.info("external_api_called", service="payment", duration_ms=round(duration_ms, 2))

# State transitions
log.info("order_status_changed", order_id=123, from_status="pending", to_status="completed")

# Errors with context
try:
    process_order(order)
except Exception:
    log.error("order_processing_failed", order_id=order.id, exc_info=True)
    raise

Never log:

# NEVER log sensitive data
log.info("user_authenticated", password=password)        # WRONG
log.info("card_charged", card_number=card.number)       # WRONG
log.info("api_call", authorization_header=headers["Authorization"])  # WRONG

# Safe: log only non-sensitive identifiers
log.info("user_authenticated", user_id=user.id)
log.info("card_charged", card_last_four=card.number[-4:])

Log Sampling for High-Volume Events#

import random

def should_sample(rate: float = 0.01) -> bool:
    """Sample rate: 0.01 = log 1% of events."""
    return random.random() < rate

# For very high-frequency events, sample to avoid log volume
if should_sample(0.001):  # 0.1% of requests
    log.debug("cache_hit", key=key)

Correlation with Distributed Tracing#

from opentelemetry import trace

log = structlog.get_logger()

def get_trace_context() -> dict:
    span = trace.get_current_span()
    if span.is_recording():
        ctx = span.get_span_context()
        return {
            "trace_id": format(ctx.trace_id, "032x"),
            "span_id": format(ctx.span_id, "016x"),
        }
    return {}

# Bind trace context to all logs
structlog.contextvars.bind_contextvars(**get_trace_context())

Conclusion#

Use structured (JSON) logging with a library like structlog. Bind request context once per request — trace ID, user ID, request path. Use log levels correctly: INFO for normal operations, ERROR for failures, DEBUG for development. Never log passwords, tokens, or PII. Sample high-frequency debug logs rather than disabling them entirely.