How do I implement retry logic for failed requests in Python?
Implementing retry logic for failed HTTP requests is crucial for building resilient web scraping and API integration applications. Network failures, temporary server issues, and rate limiting are common challenges that can be mitigated with proper retry mechanisms. This guide covers various approaches to implement retry logic in Python, from simple loops to sophisticated libraries.
Why Implement Retry Logic?
Network requests can fail for various reasons: - Temporary network connectivity issues - Server overload or temporary unavailability - Rate limiting (HTTP 429 responses) - Timeout errors - DNS resolution failures
Implementing retry logic helps your applications handle these transient failures gracefully and improves overall reliability.
Basic Retry Implementation with requests
The simplest approach uses a loop with the popular requests
library:
import requests
import time
from requests.exceptions import RequestException
def make_request_with_retry(url, max_retries=3, delay=1):
"""
Make HTTP request with basic retry logic
"""
for attempt in range(max_retries + 1):
try:
response = requests.get(url, timeout=10)
response.raise_for_status() # Raises HTTPError for bad responses
return response
except RequestException as e:
if attempt == max_retries:
raise e
print(f"Attempt {attempt + 1} failed: {e}")
time.sleep(delay)
return None
# Usage example
try:
response = make_request_with_retry("https://api.example.com/data")
print(response.json())
except requests.RequestException as e:
print(f"All retry attempts failed: {e}")
Exponential Backoff Strategy
Exponential backoff increases the delay between retries exponentially, reducing server load and improving success rates:
import requests
import time
import random
from requests.exceptions import RequestException
def exponential_backoff_retry(url, max_retries=5, base_delay=1, max_delay=60):
"""
Implement exponential backoff with jitter
"""
for attempt in range(max_retries + 1):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
return response
except RequestException as e:
if attempt == max_retries:
raise e
# Calculate delay with exponential backoff and jitter
delay = min(base_delay * (2 ** attempt), max_delay)
jitter = random.uniform(0, 0.1) * delay
total_delay = delay + jitter
print(f"Attempt {attempt + 1} failed: {e}")
print(f"Retrying in {total_delay:.2f} seconds...")
time.sleep(total_delay)
return None
# Usage
response = exponential_backoff_retry("https://api.example.com/data")
Decorator-Based Retry Implementation
Create a reusable decorator for retry functionality:
import functools
import time
import random
from requests.exceptions import RequestException
def retry_with_backoff(max_retries=3, base_delay=1, backoff_factor=2,
exceptions=(RequestException,)):
"""
Decorator for adding retry logic with exponential backoff
"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries + 1):
try:
return func(*args, **kwargs)
except exceptions as e:
if attempt == max_retries:
raise e
delay = base_delay * (backoff_factor ** attempt)
jitter = random.uniform(0, 0.1) * delay
time.sleep(delay + jitter)
print(f"Attempt {attempt + 1} failed, retrying...")
return None
return wrapper
return decorator
# Usage with decorator
@retry_with_backoff(max_retries=5, base_delay=1)
def fetch_data(url):
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.json()
# Call the decorated function
data = fetch_data("https://api.example.com/users")
Advanced Retry with Specific HTTP Status Codes
Handle specific HTTP status codes differently:
import requests
import time
from requests.exceptions import RequestException, HTTPError
def smart_retry_request(url, max_retries=3, delay=1):
"""
Retry with different strategies based on HTTP status codes
"""
retryable_status_codes = {429, 500, 502, 503, 504}
for attempt in range(max_retries + 1):
try:
response = requests.get(url, timeout=10)
# Check if status code is retryable
if response.status_code in retryable_status_codes:
if attempt == max_retries:
response.raise_for_status()
# Handle rate limiting (429) with longer delay
if response.status_code == 429:
retry_after = response.headers.get('Retry-After', delay * 2)
wait_time = int(retry_after) if retry_after.isdigit() else delay * 2
else:
wait_time = delay * (2 ** attempt)
print(f"HTTP {response.status_code}: Retrying in {wait_time}s...")
time.sleep(wait_time)
continue
response.raise_for_status()
return response
except RequestException as e:
if attempt == max_retries:
raise e
print(f"Network error: {e}. Retrying...")
time.sleep(delay * (2 ** attempt))
return None
Using the tenacity
Library
The tenacity
library provides a powerful and flexible retry mechanism:
pip install tenacity
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import requests
from requests.exceptions import RequestException
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential(multiplier=1, min=1, max=60),
retry=retry_if_exception_type(RequestException)
)
def robust_request(url):
"""
Make HTTP request with tenacity retry logic
"""
response = requests.get(url, timeout=10)
response.raise_for_status()
return response
# Usage
try:
response = robust_request("https://api.example.com/data")
print("Success:", response.json())
except Exception as e:
print(f"All retries failed: {e}")
Custom Retry Conditions with tenacity
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_result
def is_server_error(response):
"""Check if response indicates a server error"""
return response is not None and response.status_code >= 500
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
retry=retry_if_result(is_server_error)
)
def request_with_custom_retry(url):
try:
response = requests.get(url, timeout=10)
return response
except RequestException:
return None # Return None to trigger retry
# Usage
response = request_with_custom_retry("https://api.example.com/data")
if response and response.status_code == 200:
print("Success!")
Retry Logic for Web Scraping
When scraping websites, you might need more sophisticated retry logic similar to what browser automation tools handle with timeouts:
import requests
from urllib.parse import urljoin
import time
class WebScrapingSession:
def __init__(self, max_retries=3, delay=1):
self.session = requests.Session()
self.max_retries = max_retries
self.delay = delay
# Set common headers
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
def get_with_retry(self, url, **kwargs):
"""Get URL with retry logic and session management"""
for attempt in range(self.max_retries + 1):
try:
response = self.session.get(url, timeout=15, **kwargs)
# Handle common web scraping issues
if response.status_code == 403:
print("Access forbidden - might need different headers")
time.sleep(self.delay * 3) # Longer delay for 403
elif response.status_code == 429:
print("Rate limited - waiting longer...")
time.sleep(self.delay * 5)
elif response.status_code >= 500:
print(f"Server error {response.status_code}")
time.sleep(self.delay * 2)
else:
response.raise_for_status()
return response
except requests.exceptions.ConnectionError as e:
print(f"Connection error: {e}")
if attempt < self.max_retries:
time.sleep(self.delay * (2 ** attempt))
except requests.exceptions.Timeout as e:
print(f"Timeout error: {e}")
if attempt < self.max_retries:
time.sleep(self.delay)
except Exception as e:
if attempt == self.max_retries:
raise e
time.sleep(self.delay)
return None
# Usage for web scraping
scraper = WebScrapingSession(max_retries=5, delay=2)
response = scraper.get_with_retry("https://example.com/page")
if response:
print("Successfully scraped page")
Async Retry Implementation
For async applications using aiohttp
:
import asyncio
import aiohttp
from aiohttp import ClientError
async def async_retry_request(session, url, max_retries=3, delay=1):
"""
Async retry logic with aiohttp
"""
for attempt in range(max_retries + 1):
try:
async with session.get(url) as response:
response.raise_for_status()
return await response.text()
except ClientError as e:
if attempt == max_retries:
raise e
wait_time = delay * (2 ** attempt)
print(f"Attempt {attempt + 1} failed: {e}")
await asyncio.sleep(wait_time)
return None
# Usage
async def main():
async with aiohttp.ClientSession() as session:
content = await async_retry_request(session, "https://api.example.com/data")
print(content)
# Run the async function
asyncio.run(main())
Best Practices
- Use exponential backoff with jitter to avoid thundering herd problems
- Set maximum retry limits to prevent infinite loops
- Log retry attempts for debugging and monitoring
- Handle specific error types differently (network vs. HTTP errors)
- Respect rate limits by checking
Retry-After
headers - Use circuit breaker patterns for frequently failing endpoints
Testing Retry Logic
import unittest
from unittest.mock import patch, Mock
import requests
class TestRetryLogic(unittest.TestCase):
@patch('requests.get')
def test_successful_retry(self, mock_get):
# First call fails, second succeeds
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_get.side_effect = [requests.RequestException("Network error"), mock_response]
result = make_request_with_retry("http://test.com", max_retries=2)
self.assertEqual(result, mock_response)
self.assertEqual(mock_get.call_count, 2)
@patch('requests.get')
def test_max_retries_exceeded(self, mock_get):
mock_get.side_effect = requests.RequestException("Persistent error")
with self.assertRaises(requests.RequestException):
make_request_with_retry("http://test.com", max_retries=2)
self.assertEqual(mock_get.call_count, 3) # Initial + 2 retries
Conclusion
Implementing robust retry logic is essential for building reliable Python applications that interact with external services. Whether you choose a simple loop-based approach or sophisticated libraries like tenacity
, the key is to handle failures gracefully while avoiding overwhelming target servers. Consider your specific use case, the types of errors you expect, and the performance requirements when choosing your retry strategy.
For complex web scraping scenarios, you might also want to explore how browser automation tools handle error conditions to complement your HTTP retry strategies.