Table of contents

How do I log requests and responses with urllib3?

Logging HTTP requests and responses in urllib3 is essential for debugging network issues, monitoring API calls, and understanding application behavior. Python's built-in logging module provides the foundation for capturing urllib3's internal logging information.

Basic Logging Setup

Quick Start Example

import logging
import urllib3

# Enable debug logging for urllib3
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

# Create PoolManager and make request
http = urllib3.PoolManager()
response = http.request('GET', 'https://httpbin.org/get')
print(f"Status: {response.status}")

Configuring Specific urllib3 Loggers

urllib3 uses multiple internal loggers for different components:

import logging
import urllib3

# Configure root logging
logging.basicConfig(level=logging.INFO)

# Configure specific urllib3 loggers
loggers = [
    'urllib3.connectionpool',     # Connection pooling details
    'urllib3.poolmanager',        # Pool manager operations
    'urllib3.util.retry',         # Retry attempts
]

for logger_name in loggers:
    logging.getLogger(logger_name).setLevel(logging.DEBUG)

# Disable SSL warnings if needed
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

http = urllib3.PoolManager()
response = http.request('GET', 'https://httpbin.org/json')

Custom Logging Configuration

File-Based Logging

import logging
import urllib3
from datetime import datetime

# Create custom logger
logger = logging.getLogger('urllib3_requests')
logger.setLevel(logging.DEBUG)

# Create file handler with timestamp
log_filename = f"urllib3_logs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
file_handler = logging.FileHandler(log_filename)
file_handler.setLevel(logging.DEBUG)

# Create console handler for important messages
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)

# Create formatters
file_formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
console_formatter = logging.Formatter('%(levelname)s - %(message)s')

file_handler.setFormatter(file_formatter)
console_handler.setFormatter(console_formatter)

# Add handlers to logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

# Configure urllib3 to use our logger
logging.getLogger('urllib3').setLevel(logging.DEBUG)

# Make requests
http = urllib3.PoolManager()
logger.info("Starting HTTP requests")

for url in ['https://httpbin.org/get', 'https://httpbin.org/json']:
    logger.info(f"Requesting: {url}")
    response = http.request('GET', url)
    logger.info(f"Response status: {response.status}")

Selective Logging with Filters

import logging
import urllib3

class UrllibFilter(logging.Filter):
    """Custom filter to control urllib3 log messages"""

    def filter(self, record):
        # Only log connection and request-related messages
        if 'Starting new HTTPS connection' in record.getMessage():
            return True
        if 'GET /' in record.getMessage():
            return True
        if 'Response status:' in record.getMessage():
            return True
        # Filter out verbose SSL/TLS messages
        if 'SSL' in record.getMessage() or 'TLS' in record.getMessage():
            return False
        return True

# Setup logging with custom filter
logging.basicConfig(level=logging.DEBUG)
urllib3_logger = logging.getLogger('urllib3')
urllib3_logger.addFilter(UrllibFilter())

http = urllib3.PoolManager()
response = http.request('GET', 'https://httpbin.org/headers')

Advanced Logging Scenarios

Request/Response Interceptor

import logging
import urllib3
import json

class RequestResponseLogger:
    def __init__(self, pool_manager):
        self.pool = pool_manager
        self.logger = logging.getLogger('request_response')
        self.logger.setLevel(logging.INFO)

        handler = logging.StreamHandler()
        formatter = logging.Formatter('%(asctime)s - %(message)s')
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)

    def request(self, method, url, **kwargs):
        # Log request details
        self.logger.info(f"→ {method} {url}")
        if 'headers' in kwargs:
            self.logger.info(f"  Headers: {dict(kwargs['headers'])}")
        if 'body' in kwargs and kwargs['body']:
            self.logger.info(f"  Body: {kwargs['body']}")

        # Make actual request
        response = self.pool.request(method, url, **kwargs)

        # Log response details
        self.logger.info(f"← {response.status} {response.reason}")
        self.logger.info(f"  Response headers: {dict(response.headers)}")

        # Log response body (be careful with large responses)
        if response.data:
            try:
                body = response.data.decode('utf-8')
                if len(body) > 500:
                    body = body[:500] + "... (truncated)"
                self.logger.info(f"  Response body: {body}")
            except UnicodeDecodeError:
                self.logger.info(f"  Response body: <binary data, {len(response.data)} bytes>")

        return response

# Usage
http = urllib3.PoolManager()
logged_http = RequestResponseLogger(http)

# Make requests through the logger
response = logged_http.request('POST', 'https://httpbin.org/post', 
                              body=json.dumps({'key': 'value'}),
                              headers={'Content-Type': 'application/json'})

Environment-Specific Configuration

import logging
import urllib3
import os

def setup_urllib3_logging():
    """Configure urllib3 logging based on environment"""

    env = os.getenv('ENVIRONMENT', 'development')

    if env == 'production':
        # Minimal logging in production
        logging.getLogger('urllib3').setLevel(logging.WARNING)

    elif env == 'development':
        # Verbose logging in development
        logging.basicConfig(
            level=logging.DEBUG,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )

        # Log to file in development
        handler = logging.FileHandler('development.log')
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        logging.getLogger('urllib3').addHandler(handler)

    elif env == 'testing':
        # Capture logs for test analysis
        logging.getLogger('urllib3').setLevel(logging.INFO)

# Setup based on environment
setup_urllib3_logging()

http = urllib3.PoolManager()
response = http.request('GET', 'https://httpbin.org/get')

Security Considerations

Important: When logging HTTP requests and responses, be extremely careful about sensitive information:

import logging
import urllib3
import re

class SecureLogger(logging.Filter):
    """Filter to remove sensitive information from logs"""

    SENSITIVE_PATTERNS = [
        r'Authorization: Bearer [^\s]+',
        r'Authorization: Basic [^\s]+',
        r'password[\'\"]\s*:\s*[\'\"]*[^\'\"]*[\'\"]*',
        r'api_key[\'\"]\s*:\s*[\'\"]*[^\'\"]*[\'\"]*',
        r'token[\'\"]\s*:\s*[\'\"]*[^\'\"]*[\'\"]*',
    ]

    def filter(self, record):
        message = record.getMessage()

        for pattern in self.SENSITIVE_PATTERNS:
            message = re.sub(pattern, '[REDACTED]', message, flags=re.IGNORECASE)

        # Update the record's message
        record.msg = message
        record.args = ()

        return True

# Apply secure logging
logging.basicConfig(level=logging.DEBUG)
urllib3_logger = logging.getLogger('urllib3')
urllib3_logger.addFilter(SecureLogger())

# Safe to log requests with sensitive headers
http = urllib3.PoolManager()
response = http.request('GET', 'https://httpbin.org/bearer', 
                       headers={'Authorization': 'Bearer secret-token'})

Best Practices

  1. Production vs Development: Use different logging levels based on environment
  2. Log Rotation: Implement log rotation to prevent disk space issues
  3. Sensitive Data: Always filter or redact sensitive information
  4. Performance: Be aware that DEBUG logging can impact performance
  5. Storage: Consider log storage and retention policies

Remember that enabling DEBUG-level logging will generate extensive output and may expose sensitive information. Use appropriate log levels and filtering for your specific use case.

Try WebScraping.AI for Your Web Scraping Needs

Looking for a powerful web scraping solution? WebScraping.AI provides an LLM-powered API that combines Chromium JavaScript rendering with rotating proxies for reliable data extraction.

Key Features:

  • AI-powered extraction: Ask questions about web pages or extract structured data fields
  • JavaScript rendering: Full Chromium browser support for dynamic content
  • Rotating proxies: Datacenter and residential proxies from multiple countries
  • Easy integration: Simple REST API with SDKs for Python, Ruby, PHP, and more
  • Reliable & scalable: Built for developers who need consistent results

Getting Started:

Get page content with AI analysis:

curl "https://api.webscraping.ai/ai/question?url=https://example.com&question=What is the main topic?&api_key=YOUR_API_KEY"

Extract structured data:

curl "https://api.webscraping.ai/ai/fields?url=https://example.com&fields[title]=Page title&fields[price]=Product price&api_key=YOUR_API_KEY"

Try in request builder

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon