How do I handle HTTP exceptions using the Requests library?

Proper exception handling is crucial when using the Python Requests library for web scraping or HTTP requests. The Requests library raises specific exceptions for different types of network and HTTP errors, allowing you to handle failures gracefully and build robust applications.

Common Requests Exceptions

Core Exception Types

The Requests library defines several exception types, all inheriting from requests.exceptions.RequestException:

  • RequestException: Base exception class for all Requests-related errors
  • HTTPError: Raised for HTTP 4XX/5XX status codes (when using raise_for_status())
  • ConnectionError: Network connection problems (DNS failures, refused connections)
  • Timeout: Request exceeds specified timeout duration
  • TooManyRedirects: Exceeds maximum redirect limit
  • URLRequired: Valid URL not provided
  • InvalidURL: Malformed URL provided

Basic Exception Handling

Simple Exception Handling

import requests
from requests.exceptions import HTTPError, ConnectionError, Timeout, RequestException

url = "https://httpbin.org/status/404"

try:
    response = requests.get(url, timeout=10)
    response.raise_for_status()  # Raises HTTPError for bad status codes
    print("Success:", response.status_code)
    print(response.text)
except HTTPError as e:
    print(f"HTTP error: {e}")
    print(f"Status code: {e.response.status_code}")
except ConnectionError as e:
    print(f"Connection error: {e}")
except Timeout as e:
    print(f"Timeout error: {e}")
except RequestException as e:
    print(f"Request error: {e}")

Comprehensive Error Handling

import requests
from requests.exceptions import (
    HTTPError, ConnectionError, Timeout, 
    TooManyRedirects, RequestException
)

def safe_request(url, max_retries=3):
    for attempt in range(max_retries):
        try:
            response = requests.get(
                url, 
                timeout=10,
                headers={'User-Agent': 'My Scraper 1.0'}
            )
            response.raise_for_status()
            return response

        except HTTPError as e:
            status_code = e.response.status_code
            if status_code == 404:
                print(f"Page not found: {url}")
                return None
            elif status_code >= 500:
                print(f"Server error {status_code}, retrying...")
                continue
            else:
                print(f"Client error {status_code}: {e}")
                return None

        except ConnectionError as e:
            print(f"Connection failed (attempt {attempt + 1}): {e}")
            if attempt == max_retries - 1:
                return None
            time.sleep(2 ** attempt)  # Exponential backoff

        except Timeout as e:
            print(f"Request timed out (attempt {attempt + 1}): {e}")
            if attempt == max_retries - 1:
                return None

        except TooManyRedirects as e:
            print(f"Too many redirects: {e}")
            return None

        except RequestException as e:
            print(f"Unexpected error: {e}")
            return None

    return None

# Usage
response = safe_request("https://httpbin.org/delay/2")
if response:
    print("Request successful!")
    print(response.json())

Advanced Exception Handling Patterns

Status Code-Specific Handling

import requests
from requests.exceptions import HTTPError

def handle_http_errors(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()

    except HTTPError as e:
        status_code = e.response.status_code

        if status_code == 401:
            print("Authentication required")
            # Handle authentication
        elif status_code == 403:
            print("Access forbidden")
            # Handle permission issues
        elif status_code == 429:
            print("Rate limited")
            # Handle rate limiting
        elif 500 <= status_code < 600:
            print(f"Server error: {status_code}")
            # Handle server errors
        else:
            print(f"HTTP error: {status_code}")

        return None

Context Manager for Error Handling

import requests
from contextlib import contextmanager

@contextmanager
def safe_session():
    session = requests.Session()
    try:
        yield session
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    finally:
        session.close()

# Usage
with safe_session() as session:
    response = session.get("https://httpbin.org/json")
    if response:
        print(response.json())

Custom Exception Handler

import requests
import logging
from functools import wraps

def handle_requests_exceptions(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except requests.exceptions.HTTPError as e:
            logging.error(f"HTTP error in {func.__name__}: {e}")
            return None
        except requests.exceptions.ConnectionError as e:
            logging.error(f"Connection error in {func.__name__}: {e}")
            return None
        except requests.exceptions.Timeout as e:
            logging.error(f"Timeout error in {func.__name__}: {e}")
            return None
        except requests.exceptions.RequestException as e:
            logging.error(f"Request error in {func.__name__}: {e}")
            return None
    return wrapper

@handle_requests_exceptions
def fetch_data(url):
    response = requests.get(url, timeout=10)
    response.raise_for_status()
    return response.json()

# Usage
data = fetch_data("https://api.example.com/data")
if data:
    print("Data retrieved successfully")

Best Practices

  1. Always use timeouts to prevent hanging requests
  2. Implement retry logic for transient failures
  3. Use exponential backoff for retries
  4. Handle specific status codes appropriately
  5. Log errors for debugging and monitoring
  6. Validate URLs before making requests
  7. Use raise_for_status() to convert bad status codes to exceptions

Remember that proper exception handling makes your web scraping applications more reliable and helps you handle various network conditions and server responses gracefully.

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon