When web scraping requires authentication, the Python requests
library provides multiple ways to handle credentials. This guide covers all major authentication methods with practical examples.
Basic Authentication
HTTP Basic Authentication encodes username and password in base64. Use it with the auth
parameter:
import requests
from requests.auth import HTTPBasicAuth
# Method 1: Using HTTPBasicAuth class
response = requests.get(
'https://httpbin.org/basic-auth/user/pass',
auth=HTTPBasicAuth('user', 'pass')
)
# Method 2: Using tuple (shorthand)
response = requests.get(
'https://httpbin.org/basic-auth/user/pass',
auth=('user', 'pass')
)
# Check authentication success
if response.status_code == 200:
print("Authentication successful")
print(response.json())
else:
print(f"Authentication failed: {response.status_code}")
Digest Authentication
Digest authentication is more secure than Basic auth as it doesn't send passwords in plain text:
import requests
from requests.auth import HTTPDigestAuth
response = requests.get(
'https://httpbin.org/digest-auth/auth/user/pass',
auth=HTTPDigestAuth('user', 'pass')
)
print(response.status_code)
print(response.json())
Bearer Token Authentication
Modern APIs often use Bearer tokens (JWT, OAuth tokens):
import requests
import os
# Get token from environment variable
access_token = os.getenv('API_ACCESS_TOKEN')
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json'
}
response = requests.get(
'https://api.github.com/user',
headers=headers
)
if response.status_code == 200:
user_data = response.json()
print(f"Hello, {user_data['name']}")
API Key Authentication
Many services use API keys in headers or query parameters:
import requests
import os
api_key = os.getenv('API_KEY')
# Method 1: API key in headers
headers = {
'X-API-Key': api_key,
'User-Agent': 'MyApp/1.0'
}
response = requests.get(
'https://api.example.com/data',
headers=headers
)
# Method 2: API key in query parameters
params = {
'api_key': api_key,
'format': 'json'
}
response = requests.get(
'https://api.example.com/data',
params=params
)
Custom Authentication
Create custom authentication handlers for proprietary schemes:
import requests
from requests.auth import AuthBase
import hmac
import hashlib
import time
class APISignatureAuth(AuthBase):
"""Custom authentication using HMAC signature"""
def __init__(self, api_key, secret_key):
self.api_key = api_key
self.secret_key = secret_key
def __call__(self, r):
timestamp = str(int(time.time()))
message = f"{r.method}{r.url}{timestamp}"
signature = hmac.new(
self.secret_key.encode(),
message.encode(),
hashlib.sha256
).hexdigest()
r.headers['X-API-Key'] = self.api_key
r.headers['X-Timestamp'] = timestamp
r.headers['X-Signature'] = signature
return r
# Usage
auth = APISignatureAuth('your_api_key', 'your_secret_key')
response = requests.get('https://api.example.com/secure', auth=auth)
Session-Based Authentication
Use sessions to persist authentication across multiple requests:
import requests
# Create session for automatic cookie handling
session = requests.Session()
# Login request
login_data = {
'username': 'your_username',
'password': 'your_password'
}
login_response = session.post(
'https://example.com/login',
data=login_data
)
if login_response.status_code == 200:
print("Login successful")
# Session cookies are automatically maintained
protected_response = session.get('https://example.com/protected')
print(protected_response.text)
# Logout when done
session.post('https://example.com/logout')
OAuth 2.0 Flow
Complete OAuth 2.0 authentication example:
import requests
import os
def get_oauth_token():
"""Get OAuth access token"""
client_id = os.getenv('OAUTH_CLIENT_ID')
client_secret = os.getenv('OAUTH_CLIENT_SECRET')
token_url = 'https://oauth.example.com/token'
data = {
'grant_type': 'client_credentials',
'client_id': client_id,
'client_secret': client_secret,
'scope': 'read write'
}
response = requests.post(token_url, data=data)
if response.status_code == 200:
return response.json()['access_token']
else:
raise Exception(f"Failed to get token: {response.status_code}")
# Use OAuth token
try:
access_token = get_oauth_token()
headers = {
'Authorization': f'Bearer {access_token}',
'Accept': 'application/json'
}
response = requests.get(
'https://api.example.com/data',
headers=headers
)
print(response.json())
except Exception as e:
print(f"Authentication error: {e}")
Error Handling and Security
Always implement proper error handling and security practices:
import requests
from requests.exceptions import RequestException
import os
import logging
logging.basicConfig(level=logging.INFO)
def secure_api_request(url, auth_token):
"""Make authenticated request with error handling"""
headers = {
'Authorization': f'Bearer {auth_token}',
'User-Agent': 'MyApp/1.0',
'Accept': 'application/json'
}
try:
response = requests.get(
url,
headers=headers,
timeout=30, # Always set timeout
verify=True # Verify SSL certificates
)
# Check for authentication errors
if response.status_code == 401:
logging.error("Authentication failed - invalid token")
return None
elif response.status_code == 403:
logging.error("Access forbidden - insufficient permissions")
return None
# Raise exception for other HTTP errors
response.raise_for_status()
return response.json()
except RequestException as e:
logging.error(f"Request failed: {e}")
return None
# Usage with environment variables
api_token = os.getenv('API_TOKEN')
if not api_token:
raise ValueError("API_TOKEN environment variable required")
data = secure_api_request('https://api.example.com/data', api_token)
if data:
print("Success:", data)
Security Best Practices
- Never hardcode credentials - Use environment variables or secure vaults
- Use HTTPS - Always verify SSL certificates
- Set timeouts - Prevent hanging requests
- Handle errors gracefully - Check status codes and catch exceptions
- Rotate tokens - Refresh access tokens before expiration
- Log securely - Never log sensitive authentication data
# Environment variables example
import os
from dotenv import load_dotenv
load_dotenv() # Load from .env file
API_KEY = os.getenv('API_KEY')
SECRET_KEY = os.getenv('SECRET_KEY')
OAUTH_TOKEN = os.getenv('OAUTH_TOKEN')
# Verify required credentials
if not all([API_KEY, SECRET_KEY]):
raise ValueError("Missing required authentication credentials")
The requests
library makes authentication straightforward while providing flexibility for various schemes. Always prioritize security by using environment variables, implementing proper error handling, and following the principle of least privilege when accessing APIs.