What is Connection Pooling?
Connection pooling in urllib3
is a performance optimization technique that reuses existing TCP connections instead of creating new ones for each HTTP request. This significantly reduces connection overhead, especially when making multiple requests to the same host.
Basic Connection Pooling with PoolManager
PoolManager
is the recommended approach for most use cases. It automatically manages connection pools for multiple hosts.
import urllib3
# Create a PoolManager instance with default settings
http = urllib3.PoolManager()
# Make multiple requests - connections are automatically reused
response1 = http.request('GET', 'https://httpbin.org/get')
response2 = http.request('POST', 'https://httpbin.org/post', fields={'key': 'value'})
response3 = http.request('GET', 'https://api.github.com/users/octocat')
print(f"Status codes: {response1.status}, {response2.status}, {response3.status}")
Configuring PoolManager Parameters
Customize PoolManager
to optimize performance for your specific needs:
from urllib3 import PoolManager, Retry, Timeout
# Advanced PoolManager configuration
http = PoolManager(
num_pools=10, # Max number of connection pools (different hosts)
maxsize=20, # Max connections per pool
block=False, # Don't block when pool is full, create new connection
retries=Retry(
total=3, # Total retry attempts
backoff_factor=0.3, # Wait time between retries
status_forcelist=[500, 502, 503, 504] # HTTP status codes to retry
),
timeout=Timeout(
connect=5.0, # Connection timeout
read=30.0 # Read timeout
),
headers={'User-Agent': 'MyApp/1.0'} # Default headers
)
# Use the configured pool manager
try:
response = http.request('GET', 'https://httpbin.org/delay/2')
print(f"Response: {response.status}")
except urllib3.exceptions.MaxRetryError as e:
print(f"Request failed after retries: {e}")
Single-Host Connection Pooling
For applications that primarily communicate with one host, use HTTPConnectionPool
or HTTPSConnectionPool
directly:
from urllib3 import HTTPSConnectionPool
# Create a dedicated pool for a single host
pool = HTTPSConnectionPool(
host='api.example.com',
port=443,
maxsize=10, # Max connections in this pool
timeout=30.0,
retries=3,
headers={'Authorization': 'Bearer your-token'}
)
# Make multiple requests using the same pool
endpoints = ['/users', '/posts', '/comments']
responses = []
for endpoint in endpoints:
try:
response = pool.request('GET', endpoint)
responses.append(response.status)
print(f"GET {endpoint}: {response.status}")
except Exception as e:
print(f"Error requesting {endpoint}: {e}")
print(f"All responses: {responses}")
Thread Safety and Concurrent Requests
urllib3
pools are thread-safe, making them suitable for concurrent applications:
import urllib3
import threading
import time
# Create a shared pool manager
http = urllib3.PoolManager(maxsize=50)
def make_request(url, request_id):
"""Make a request and print timing information"""
start_time = time.time()
try:
response = http.request('GET', url)
duration = time.time() - start_time
print(f"Request {request_id}: {response.status} in {duration:.2f}s")
except Exception as e:
print(f"Request {request_id} failed: {e}")
# Launch concurrent requests
threads = []
urls = ['https://httpbin.org/delay/1'] * 10
for i, url in enumerate(urls):
thread = threading.Thread(target=make_request, args=(url, i+1))
threads.append(thread)
thread.start()
# Wait for all threads to complete
for thread in threads:
thread.join()
Monitoring Pool Statistics
Track connection pool usage for performance optimization:
import urllib3
# Create pool manager with custom configuration
http = urllib3.PoolManager(num_pools=5, maxsize=10)
# Make some requests
for i in range(20):
response = http.request('GET', f'https://httpbin.org/status/{200 + i % 5}')
# Check pool statistics (internal implementation details)
print("Pool manager pools:")
for key, pool in http.pools.items():
print(f" {key}: {pool.num_connections} connections, {pool.num_requests} requests")
Best Practices for Pool Management
1. Proper Resource Cleanup
import urllib3
import atexit
# Create pool manager
http = urllib3.PoolManager()
# Register cleanup function
def cleanup_pools():
print("Cleaning up connection pools...")
http.clear()
atexit.register(cleanup_pools)
# Use context manager for automatic cleanup
class PoolManagerContext:
def __init__(self, **kwargs):
self.pool_manager = urllib3.PoolManager(**kwargs)
def __enter__(self):
return self.pool_manager
def __exit__(self, exc_type, exc_val, exc_tb):
self.pool_manager.clear()
# Usage with context manager
with PoolManagerContext(maxsize=20) as http:
response = http.request('GET', 'https://httpbin.org/get')
print(f"Response status: {response.status}")
# Pools automatically cleaned up here
2. Environment-Specific Configuration
import os
import urllib3
def create_optimized_pool():
"""Create a pool manager optimized for the current environment"""
# Development settings
if os.getenv('ENV') == 'development':
return urllib3.PoolManager(
num_pools=5,
maxsize=10,
timeout=urllib3.Timeout(connect=5.0, read=30.0)
)
# Production settings
return urllib3.PoolManager(
num_pools=50,
maxsize=100,
block=False,
retries=urllib3.Retry(
total=5,
backoff_factor=0.5,
status_forcelist=[500, 502, 503, 504, 429]
),
timeout=urllib3.Timeout(connect=10.0, read=60.0)
)
# Use environment-optimized pool
http = create_optimized_pool()
3. Error Handling and Monitoring
import urllib3
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class MonitoredPoolManager:
def __init__(self, **kwargs):
self.pool_manager = urllib3.PoolManager(**kwargs)
self.request_count = 0
self.error_count = 0
def request(self, method, url, **kwargs):
self.request_count += 1
try:
response = self.pool_manager.request(method, url, **kwargs)
logger.info(f"{method} {url}: {response.status}")
return response
except Exception as e:
self.error_count += 1
logger.error(f"{method} {url} failed: {e}")
raise
def get_stats(self):
return {
'total_requests': self.request_count,
'total_errors': self.error_count,
'error_rate': self.error_count / max(self.request_count, 1)
}
def clear(self):
self.pool_manager.clear()
# Usage
http = MonitoredPoolManager(maxsize=20)
# Make requests
for i in range(10):
try:
response = http.request('GET', f'https://httpbin.org/status/{200 if i < 8 else 500}')
except:
pass
# Check statistics
stats = http.get_stats()
print(f"Statistics: {stats}")
# Cleanup
http.clear()
Performance Considerations
- Pool Size: Set
maxsize
based on expected concurrent requests - Number of Pools: Use
num_pools
for applications accessing many different hosts - Timeouts: Configure appropriate connection and read timeouts
- Retries: Implement retry logic for transient failures
- Keep-Alive: Connection pooling automatically handles HTTP keep-alive
Connection pooling in urllib3
significantly improves performance by reusing TCP connections. Choose PoolManager
for multi-host applications or HTTPConnectionPool
for single-host scenarios, and always configure pools based on your specific requirements and environment.