Cookies are essential for maintaining state and authentication in web applications. The Python requests
library provides multiple ways to handle cookies efficiently, whether you need to send existing cookies or manage cookies received from servers.
Method 1: Sending Cookies to the Server
Using the cookies
Parameter (Recommended)
The most straightforward way to send cookies is using the cookies
parameter:
import requests
url = 'https://httpbin.org/cookies'
cookies = {
'session_token': '123456789',
'user_id': 'john_doe',
'preferences': 'dark_mode=true'
}
response = requests.get(url, cookies=cookies)
print(response.json()) # Server confirms received cookies
Using RequestsCookieJar for Advanced Cookie Control
For more control over cookie attributes like domain, path, and expiration:
import requests
from requests.cookies import RequestsCookieJar
jar = RequestsCookieJar()
jar.set('session_token', '123456789', domain='example.com', path='/')
jar.set('csrf_token', 'abc123', domain='example.com', path='/api/')
response = requests.get('https://example.com/api/data', cookies=jar)
Manual Cookie Headers
While less flexible, you can set cookies directly in headers:
import requests
headers = {
'Cookie': 'session_token=123456789; user_id=john_doe; csrf_token=abc123',
'User-Agent': 'Mozilla/5.0...'
}
response = requests.get('https://example.com/api', headers=headers)
Method 2: Handling Server-Set Cookies
Accessing Response Cookies
When servers set cookies, requests
automatically stores them in the response object:
import requests
response = requests.get('https://httpbin.org/cookies/set/test_cookie/cookie_value')
# Access individual cookies
for cookie in response.cookies:
print(f"{cookie.name}: {cookie.value}")
print(f"Domain: {cookie.domain}, Path: {cookie.path}")
print(f"Secure: {cookie.secure}, HttpOnly: {cookie.has_nonstandard_attr('HttpOnly')}")
# Convert to dictionary for easier access
cookie_dict = dict(response.cookies)
print(cookie_dict)
Extracting Specific Cookie Information
import requests
response = requests.get('https://example.com/login')
# Check if a specific cookie exists
if 'session_id' in response.cookies:
session_id = response.cookies['session_id']
print(f"Session ID: {session_id}")
# Get cookie with additional attributes
session_cookie = response.cookies.get('session_id')
if session_cookie:
print(f"Expires: {session_cookie.expires}")
print(f"Secure: {session_cookie.secure}")
Method 3: Session-Based Cookie Management (Recommended)
Using requests.Session()
is the best approach for maintaining cookies across multiple requests:
import requests
# Create a persistent session
session = requests.Session()
# Login and receive authentication cookies
login_data = {'username': 'user', 'password': 'pass'}
login_response = session.post('https://example.com/login', data=login_data)
# Cookies are automatically stored in the session
print("Session cookies:", dict(session.cookies))
# Subsequent requests automatically include stored cookies
profile_response = session.get('https://example.com/profile')
api_response = session.get('https://example.com/api/user-data')
# Add additional cookies to the session
session.cookies.set('preference', 'dark_theme')
Real-World Example: Web Scraping with Authentication
import requests
def scrape_with_login(username, password):
session = requests.Session()
# Step 1: Get login page (may set CSRF tokens)
login_page = session.get('https://example.com/login')
# Step 2: Submit login credentials
login_data = {
'username': username,
'password': password,
'csrf_token': extract_csrf_token(login_page.text) # Custom function
}
login_response = session.post('https://example.com/login', data=login_data)
if login_response.status_code == 200:
# Step 3: Access protected content
protected_content = session.get('https://example.com/dashboard')
return protected_content.text
else:
raise Exception("Login failed")
def extract_csrf_token(html):
# Implementation depends on the website structure
pass
Method 4: Persistent Cookie Storage
Saving Cookies with Pickle
import requests
import pickle
from pathlib import Path
def save_session_cookies(session, filepath):
"""Save session cookies to file"""
with open(filepath, 'wb') as f:
pickle.dump(session.cookies, f)
def load_session_cookies(session, filepath):
"""Load cookies from file into session"""
if Path(filepath).exists():
with open(filepath, 'rb') as f:
cookies = pickle.load(f)
session.cookies.update(cookies)
return True
return False
# Usage example
session = requests.Session()
# Try to load existing cookies
if not load_session_cookies(session, 'cookies.pkl'):
# No saved cookies, need to login
login_response = session.post('https://example.com/login',
data={'user': 'john', 'pass': 'secret'})
# Save cookies after successful login
save_session_cookies(session, 'cookies.pkl')
# Use the session with loaded/saved cookies
response = session.get('https://example.com/protected-resource')
JSON-Based Cookie Storage (Human-Readable)
import requests
import json
def save_cookies_json(cookies, filepath):
"""Save cookies as JSON"""
cookie_dict = {}
for cookie in cookies:
cookie_dict[cookie.name] = {
'value': cookie.value,
'domain': cookie.domain,
'path': cookie.path
}
with open(filepath, 'w') as f:
json.dump(cookie_dict, f, indent=2)
def load_cookies_json(session, filepath):
"""Load cookies from JSON file"""
try:
with open(filepath, 'r') as f:
cookie_dict = json.load(f)
for name, attrs in cookie_dict.items():
session.cookies.set(name, attrs['value'],
domain=attrs['domain'],
path=attrs['path'])
return True
except FileNotFoundError:
return False
Advanced Cookie Techniques
Cookie-Based Rate Limiting and Retry Logic
import requests
import time
def make_request_with_retry(session, url, max_retries=3):
for attempt in range(max_retries):
try:
response = session.get(url, timeout=10)
# Check for rate limiting cookies
if 'rate_limit_remaining' in response.cookies:
remaining = int(response.cookies['rate_limit_remaining'])
if remaining < 5: # Low rate limit
time.sleep(2) # Wait before next request
return response
except requests.RequestException as e:
if attempt == max_retries - 1:
raise e
time.sleep(2 ** attempt) # Exponential backoff
Debugging Cookie Issues
import requests
import logging
# Enable detailed logging for debugging
logging.basicConfig(level=logging.DEBUG)
session = requests.Session()
# Add request/response hooks for cookie debugging
def log_cookies(response, *args, **kwargs):
print(f"Response cookies: {dict(response.cookies)}")
print(f"Request cookies: {dict(response.request._cookies)}")
session.hooks['response'].append(log_cookies)
response = session.get('https://httpbin.org/cookies/set/debug/true')
Best Practices and Security Considerations
- Always use Sessions for multiple requests to the same site
- Validate cookie security - check for Secure and HttpOnly flags
- Handle cookie expiration by checking expiry dates
- Respect robots.txt and website terms of service
- Use HTTPS when handling authentication cookies
- Clear sensitive cookies after use to prevent security issues
# Example: Secure cookie handling
def secure_request_session():
session = requests.Session()
# Set secure defaults
session.headers.update({
'User-Agent': 'Mozilla/5.0 (compatible; MyBot/1.0)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
})
return session
# Always clean up sensitive data
def cleanup_session(session):
# Clear authentication cookies
sensitive_cookies = ['session_token', 'auth_token', 'csrf_token']
for cookie_name in sensitive_cookies:
if cookie_name in session.cookies:
del session.cookies[cookie_name]
Remember to always respect website terms of service and implement appropriate delays between requests to avoid overwhelming servers.