Proper exception handling is crucial when using the Python Requests library for web scraping or HTTP requests. The Requests library raises specific exceptions for different types of network and HTTP errors, allowing you to handle failures gracefully and build robust applications.
Common Requests Exceptions
Core Exception Types
The Requests library defines several exception types, all inheriting from requests.exceptions.RequestException
:
RequestException
: Base exception class for all Requests-related errorsHTTPError
: Raised for HTTP 4XX/5XX status codes (when usingraise_for_status()
)ConnectionError
: Network connection problems (DNS failures, refused connections)Timeout
: Request exceeds specified timeout durationTooManyRedirects
: Exceeds maximum redirect limitURLRequired
: Valid URL not providedInvalidURL
: Malformed URL provided
Basic Exception Handling
Simple Exception Handling
import requests
from requests.exceptions import HTTPError, ConnectionError, Timeout, RequestException
url = "https://httpbin.org/status/404"
try:
response = requests.get(url, timeout=10)
response.raise_for_status() # Raises HTTPError for bad status codes
print("Success:", response.status_code)
print(response.text)
except HTTPError as e:
print(f"HTTP error: {e}")
print(f"Status code: {e.response.status_code}")
except ConnectionError as e:
print(f"Connection error: {e}")
except Timeout as e:
print(f"Timeout error: {e}")
except RequestException as e:
print(f"Request error: {e}")
Comprehensive Error Handling
import requests
from requests.exceptions import (
HTTPError, ConnectionError, Timeout,
TooManyRedirects, RequestException
)
def safe_request(url, max_retries=3):
for attempt in range(max_retries):
try:
response = requests.get(
url,
timeout=10,
headers={'User-Agent': 'My Scraper 1.0'}
)
response.raise_for_status()
return response
except HTTPError as e:
status_code = e.response.status_code
if status_code == 404:
print(f"Page not found: {url}")
return None
elif status_code >= 500:
print(f"Server error {status_code}, retrying...")
continue
else:
print(f"Client error {status_code}: {e}")
return None
except ConnectionError as e:
print(f"Connection failed (attempt {attempt + 1}): {e}")
if attempt == max_retries - 1:
return None
time.sleep(2 ** attempt) # Exponential backoff
except Timeout as e:
print(f"Request timed out (attempt {attempt + 1}): {e}")
if attempt == max_retries - 1:
return None
except TooManyRedirects as e:
print(f"Too many redirects: {e}")
return None
except RequestException as e:
print(f"Unexpected error: {e}")
return None
return None
# Usage
response = safe_request("https://httpbin.org/delay/2")
if response:
print("Request successful!")
print(response.json())
Advanced Exception Handling Patterns
Status Code-Specific Handling
import requests
from requests.exceptions import HTTPError
def handle_http_errors(url):
try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except HTTPError as e:
status_code = e.response.status_code
if status_code == 401:
print("Authentication required")
# Handle authentication
elif status_code == 403:
print("Access forbidden")
# Handle permission issues
elif status_code == 429:
print("Rate limited")
# Handle rate limiting
elif 500 <= status_code < 600:
print(f"Server error: {status_code}")
# Handle server errors
else:
print(f"HTTP error: {status_code}")
return None
Context Manager for Error Handling
import requests
from contextlib import contextmanager
@contextmanager
def safe_session():
session = requests.Session()
try:
yield session
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
finally:
session.close()
# Usage
with safe_session() as session:
response = session.get("https://httpbin.org/json")
if response:
print(response.json())
Custom Exception Handler
import requests
import logging
from functools import wraps
def handle_requests_exceptions(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except requests.exceptions.HTTPError as e:
logging.error(f"HTTP error in {func.__name__}: {e}")
return None
except requests.exceptions.ConnectionError as e:
logging.error(f"Connection error in {func.__name__}: {e}")
return None
except requests.exceptions.Timeout as e:
logging.error(f"Timeout error in {func.__name__}: {e}")
return None
except requests.exceptions.RequestException as e:
logging.error(f"Request error in {func.__name__}: {e}")
return None
return wrapper
@handle_requests_exceptions
def fetch_data(url):
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.json()
# Usage
data = fetch_data("https://api.example.com/data")
if data:
print("Data retrieved successfully")
Best Practices
- Always use timeouts to prevent hanging requests
- Implement retry logic for transient failures
- Use exponential backoff for retries
- Handle specific status codes appropriately
- Log errors for debugging and monitoring
- Validate URLs before making requests
- Use
raise_for_status()
to convert bad status codes to exceptions
Remember that proper exception handling makes your web scraping applications more reliable and helps you handle various network conditions and server responses gracefully.