How do you handle API errors and exceptions gracefully?
When building web scraping applications that interact with APIs, proper error handling is crucial for creating robust, maintainable, and reliable systems. API errors are inevitable due to network issues, server problems, rate limiting, authentication failures, and various other factors. This comprehensive guide covers best practices for handling API errors and exceptions gracefully across different programming languages and scenarios.
Understanding API Error Types
Before implementing error handling strategies, it's important to understand the different types of errors you might encounter:
HTTP Status Code Errors
- 4xx Client Errors: Bad requests, authentication issues, not found errors
- 5xx Server Errors: Internal server errors, service unavailable, gateway timeouts
- Network Errors: Connection timeouts, DNS resolution failures, connection refused
Application-Level Errors
- Rate Limiting: 429 Too Many Requests
- Authentication: 401 Unauthorized, 403 Forbidden
- Data Validation: 400 Bad Request with validation details
- Resource Limits: 413 Payload Too Large, 414 URI Too Long
Python Error Handling Implementation
Here's a comprehensive Python implementation using the requests
library with proper error handling:
import requests
import time
import logging
from typing import Optional, Dict, Any
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class APIClient:
def __init__(self, base_url: str, api_key: str = None):
self.base_url = base_url
self.api_key = api_key
self.session = self._create_session()
self.logger = logging.getLogger(__name__)
def _create_session(self) -> requests.Session:
"""Create a session with retry strategy"""
session = requests.Session()
# Configure retry strategy
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE"]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
def make_request(self, endpoint: str, method: str = "GET",
data: Dict = None, params: Dict = None) -> Optional[Dict[Any, Any]]:
"""Make an API request with comprehensive error handling"""
url = f"{self.base_url}/{endpoint.lstrip('/')}"
headers = self._get_headers()
try:
response = self.session.request(
method=method,
url=url,
headers=headers,
json=data,
params=params,
timeout=(10, 30) # (connect_timeout, read_timeout)
)
return self._handle_response(response)
except requests.exceptions.Timeout:
self.logger.error(f"Timeout error for {url}")
raise APITimeoutError(f"Request timed out for {url}")
except requests.exceptions.ConnectionError:
self.logger.error(f"Connection error for {url}")
raise APIConnectionError(f"Failed to connect to {url}")
except requests.exceptions.RequestException as e:
self.logger.error(f"Request exception for {url}: {str(e)}")
raise APIRequestError(f"Request failed: {str(e)}")
def _handle_response(self, response: requests.Response) -> Dict[Any, Any]:
"""Handle different response status codes"""
try:
# Always try to get JSON response for error details
response_data = response.json()
except ValueError:
response_data = {"message": response.text}
if response.status_code == 200:
return response_data
elif response.status_code == 429:
# Handle rate limiting
retry_after = int(response.headers.get('Retry-After', 60))
self.logger.warning(f"Rate limited. Waiting {retry_after} seconds")
time.sleep(retry_after)
raise APIRateLimitError("Rate limit exceeded", retry_after=retry_after)
elif response.status_code == 401:
raise APIAuthenticationError("Authentication failed")
elif response.status_code == 403:
raise APIAuthorizationError("Access forbidden")
elif response.status_code == 404:
raise APINotFoundError("Resource not found")
elif 400 <= response.status_code < 500:
error_msg = response_data.get('message', 'Client error')
raise APIClientError(f"Client error: {error_msg}", status_code=response.status_code)
elif 500 <= response.status_code < 600:
error_msg = response_data.get('message', 'Server error')
raise APIServerError(f"Server error: {error_msg}", status_code=response.status_code)
else:
raise APIError(f"Unexpected status code: {response.status_code}")
# Custom exception classes
class APIError(Exception):
"""Base API exception"""
def __init__(self, message: str, status_code: int = None):
self.message = message
self.status_code = status_code
super().__init__(self.message)
class APITimeoutError(APIError):
"""API timeout exception"""
pass
class APIConnectionError(APIError):
"""API connection exception"""
pass
class APIRateLimitError(APIError):
"""API rate limiting exception"""
def __init__(self, message: str, retry_after: int = 60):
self.retry_after = retry_after
super().__init__(message)
class APIAuthenticationError(APIError):
"""API authentication exception"""
pass
class APIAuthorizationError(APIError):
"""API authorization exception"""
pass
class APINotFoundError(APIError):
"""API not found exception"""
pass
class APIClientError(APIError):
"""API client error (4xx)"""
pass
class APIServerError(APIError):
"""API server error (5xx)"""
pass
class APIRequestError(APIError):
"""General API request exception"""
pass
JavaScript Error Handling Implementation
Here's a robust JavaScript implementation using modern async/await syntax with comprehensive error handling:
class APIClient {
constructor(baseUrl, apiKey = null) {
this.baseUrl = baseUrl;
this.apiKey = apiKey;
this.defaultTimeout = 30000; // 30 seconds
}
async makeRequest(endpoint, options = {}) {
const {
method = 'GET',
data = null,
params = {},
timeout = this.defaultTimeout,
retries = 3,
retryDelay = 1000
} = options;
const url = new URL(endpoint, this.baseUrl);
// Add query parameters
Object.keys(params).forEach(key => {
url.searchParams.append(key, params[key]);
});
const requestOptions = {
method,
headers: this._getHeaders(),
signal: AbortSignal.timeout(timeout)
};
if (data && ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())) {
requestOptions.body = JSON.stringify(data);
}
return this._requestWithRetry(url.toString(), requestOptions, retries, retryDelay);
}
async _requestWithRetry(url, options, retries, retryDelay) {
for (let attempt = 0; attempt <= retries; attempt++) {
try {
const response = await fetch(url, options);
return await this._handleResponse(response);
} catch (error) {
if (attempt === retries) {
throw this._handleError(error);
}
// Check if we should retry
if (this._shouldRetry(error)) {
const delay = retryDelay * Math.pow(2, attempt); // Exponential backoff
console.warn(`Request failed, retrying in ${delay}ms (attempt ${attempt + 1}/${retries})`);
await this._sleep(delay);
} else {
throw this._handleError(error);
}
}
}
}
async _handleResponse(response) {
let responseData;
const contentType = response.headers.get('content-type');
try {
if (contentType && contentType.includes('application/json')) {
responseData = await response.json();
} else {
responseData = { message: await response.text() };
}
} catch (e) {
responseData = { message: 'Failed to parse response' };
}
if (response.ok) {
return responseData;
}
// Handle specific status codes
switch (response.status) {
case 429:
const retryAfter = parseInt(response.headers.get('Retry-After') || '60');
throw new APIRateLimitError('Rate limit exceeded', retryAfter);
case 401:
throw new APIAuthenticationError('Authentication failed');
case 403:
throw new APIAuthorizationError('Access forbidden');
case 404:
throw new APINotFoundError('Resource not found');
default:
if (response.status >= 400 && response.status < 500) {
throw new APIClientError(
responseData.message || 'Client error',
response.status
);
} else if (response.status >= 500) {
throw new APIServerError(
responseData.message || 'Server error',
response.status
);
} else {
throw new APIError(`Unexpected status code: ${response.status}`);
}
}
}
_handleError(error) {
if (error.name === 'AbortError') {
return new APITimeoutError('Request timed out');
}
if (error.name === 'TypeError' && error.message.includes('fetch')) {
return new APIConnectionError('Network connection failed');
}
// Return the error as-is if it's already an API error
if (error instanceof APIError) {
return error;
}
return new APIRequestError(`Request failed: ${error.message}`);
}
_shouldRetry(error) {
// Retry on network errors, timeouts, and certain status codes
return error instanceof APITimeoutError ||
error instanceof APIConnectionError ||
(error instanceof APIServerError && error.statusCode >= 500) ||
(error instanceof APIRateLimitError);
}
_getHeaders() {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'APIClient/1.0'
};
if (this.apiKey) {
headers['Authorization'] = `Bearer ${this.apiKey}`;
}
return headers;
}
_sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
// Custom error classes
class APIError extends Error {
constructor(message, statusCode = null) {
super(message);
this.name = this.constructor.name;
this.statusCode = statusCode;
}
}
class APITimeoutError extends APIError {}
class APIConnectionError extends APIError {}
class APIRequestError extends APIError {}
class APIAuthenticationError extends APIError {}
class APIAuthorizationError extends APIError {}
class APINotFoundError extends APIError {}
class APIClientError extends APIError {}
class APIServerError extends APIError {}
class APIRateLimitError extends APIError {
constructor(message, retryAfter = 60) {
super(message);
this.retryAfter = retryAfter;
}
}
Advanced Error Handling Strategies
Exponential Backoff with Jitter
Implementing exponential backoff with jitter helps prevent thundering herd problems when multiple clients retry simultaneously:
import random
def exponential_backoff_with_jitter(attempt: int, base_delay: float = 1.0, max_delay: float = 60.0) -> float:
"""Calculate delay with exponential backoff and jitter"""
delay = min(base_delay * (2 ** attempt), max_delay)
jitter = delay * 0.1 * random.random() # Add 0-10% jitter
return delay + jitter
Circuit Breaker Pattern
Implement a circuit breaker to prevent cascading failures:
from enum import Enum
from datetime import datetime, timedelta
class CircuitBreakerState(Enum):
CLOSED = "closed"
OPEN = "open"
HALF_OPEN = "half_open"
class CircuitBreaker:
def __init__(self, failure_threshold: int = 5, timeout: int = 60):
self.failure_threshold = failure_threshold
self.timeout = timedelta(seconds=timeout)
self.failure_count = 0
self.last_failure_time = None
self.state = CircuitBreakerState.CLOSED
def call(self, func, *args, **kwargs):
if self.state == CircuitBreakerState.OPEN:
if datetime.now() - self.last_failure_time > self.timeout:
self.state = CircuitBreakerState.HALF_OPEN
else:
raise CircuitBreakerOpenError("Circuit breaker is open")
try:
result = func(*args, **kwargs)
self._on_success()
return result
except Exception as e:
self._on_failure()
raise e
def _on_success(self):
self.failure_count = 0
self.state = CircuitBreakerState.CLOSED
def _on_failure(self):
self.failure_count += 1
self.last_failure_time = datetime.now()
if self.failure_count >= self.failure_threshold:
self.state = CircuitBreakerState.OPEN
Monitoring and Logging
Proper monitoring and logging are essential for debugging API issues:
import logging
import structlog
# Configure structured logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.JSONRenderer()
],
wrapper_class=structlog.stdlib.LoggerFactory(),
logger_factory=structlog.stdlib.LoggerFactory(),
context_class=dict,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
def log_api_request(url: str, method: str, status_code: int = None,
response_time: float = None, error: str = None):
"""Log API request details"""
log_data = {
"url": url,
"method": method,
"response_time_ms": response_time * 1000 if response_time else None,
}
if status_code:
log_data["status_code"] = status_code
if error:
log_data["error"] = error
logger.error("API request failed", **log_data)
else:
logger.info("API request completed", **log_data)
Best Practices for API Error Handling
1. Use Specific Exception Types
Create specific exception classes for different error scenarios to enable targeted error handling.
2. Implement Proper Retry Logic
Use exponential backoff with jitter and respect server-provided Retry-After
headers.
3. Handle Rate Limiting Gracefully
When dealing with API rate limiting scenarios, implement proper backoff strategies and queue management.
4. Log Comprehensive Error Information
Include request details, response codes, and timing information in your logs for effective debugging.
5. Set Appropriate Timeouts
Configure both connection and read timeouts to prevent hanging requests.
6. Use Connection Pooling
Reuse connections when possible to improve performance and reduce resource usage.
7. Monitor Error Rates
Track error rates and patterns to identify systemic issues before they become critical.
Testing Error Handling
Testing your error handling code is crucial. Here's an example using Python's unittest.mock
:
import unittest
from unittest.mock import patch, Mock
import requests
class TestAPIClient(unittest.TestCase):
def setUp(self):
self.client = APIClient("https://api.example.com", "test-key")
@patch('requests.Session.request')
def test_timeout_error(self, mock_request):
mock_request.side_effect = requests.exceptions.Timeout()
with self.assertRaises(APITimeoutError):
self.client.make_request("/test")
@patch('requests.Session.request')
def test_rate_limit_handling(self, mock_request):
mock_response = Mock()
mock_response.status_code = 429
mock_response.headers = {'Retry-After': '30'}
mock_response.json.return_value = {'message': 'Rate limited'}
mock_request.return_value = mock_response
with self.assertRaises(APIRateLimitError) as context:
self.client.make_request("/test")
self.assertEqual(context.exception.retry_after, 30)
Conclusion
Graceful API error handling is fundamental to building reliable web scraping applications. By implementing comprehensive error handling strategies, including proper exception hierarchies, retry logic with exponential backoff, circuit breakers, and thorough logging, you can create robust systems that handle failures gracefully and provide meaningful feedback for debugging and monitoring.
Remember that different APIs may have unique error patterns and requirements, so always consult the API documentation and test your error handling thoroughly. When implementing retry logic for failed API requests, consider the specific characteristics of your target APIs and adjust your strategies accordingly.
The key is to anticipate failures, handle them gracefully, and provide clear information about what went wrong and how the system responded. This approach will make your web scraping applications more reliable and maintainable in production environments.