Table of contents

How do I handle cookies with the Requests library?

Cookies are essential for maintaining state and authentication in web applications. The Python requests library provides multiple ways to handle cookies efficiently, whether you need to send existing cookies or manage cookies received from servers.

Method 1: Sending Cookies to the Server

Using the cookies Parameter (Recommended)

The most straightforward way to send cookies is using the cookies parameter:

import requests

url = 'https://httpbin.org/cookies'
cookies = {
    'session_token': '123456789',
    'user_id': 'john_doe',
    'preferences': 'dark_mode=true'
}

response = requests.get(url, cookies=cookies)
print(response.json())  # Server confirms received cookies

Using RequestsCookieJar for Advanced Cookie Control

For more control over cookie attributes like domain, path, and expiration:

import requests
from requests.cookies import RequestsCookieJar

jar = RequestsCookieJar()
jar.set('session_token', '123456789', domain='example.com', path='/')
jar.set('csrf_token', 'abc123', domain='example.com', path='/api/')

response = requests.get('https://example.com/api/data', cookies=jar)

Manual Cookie Headers

While less flexible, you can set cookies directly in headers:

import requests

headers = {
    'Cookie': 'session_token=123456789; user_id=john_doe; csrf_token=abc123',
    'User-Agent': 'Mozilla/5.0...'
}

response = requests.get('https://example.com/api', headers=headers)

Method 2: Handling Server-Set Cookies

Accessing Response Cookies

When servers set cookies, requests automatically stores them in the response object:

import requests

response = requests.get('https://httpbin.org/cookies/set/test_cookie/cookie_value')

# Access individual cookies
for cookie in response.cookies:
    print(f"{cookie.name}: {cookie.value}")
    print(f"Domain: {cookie.domain}, Path: {cookie.path}")
    print(f"Secure: {cookie.secure}, HttpOnly: {cookie.has_nonstandard_attr('HttpOnly')}")

# Convert to dictionary for easier access
cookie_dict = dict(response.cookies)
print(cookie_dict)

Extracting Specific Cookie Information

import requests

response = requests.get('https://example.com/login')

# Check if a specific cookie exists
if 'session_id' in response.cookies:
    session_id = response.cookies['session_id']
    print(f"Session ID: {session_id}")

# Get cookie with additional attributes
session_cookie = response.cookies.get('session_id')
if session_cookie:
    print(f"Expires: {session_cookie.expires}")
    print(f"Secure: {session_cookie.secure}")

Method 3: Session-Based Cookie Management (Recommended)

Using requests.Session() is the best approach for maintaining cookies across multiple requests:

import requests

# Create a persistent session
session = requests.Session()

# Login and receive authentication cookies
login_data = {'username': 'user', 'password': 'pass'}
login_response = session.post('https://example.com/login', data=login_data)

# Cookies are automatically stored in the session
print("Session cookies:", dict(session.cookies))

# Subsequent requests automatically include stored cookies
profile_response = session.get('https://example.com/profile')
api_response = session.get('https://example.com/api/user-data')

# Add additional cookies to the session
session.cookies.set('preference', 'dark_theme')

Real-World Example: Web Scraping with Authentication

import requests

def scrape_with_login(username, password):
    session = requests.Session()

    # Step 1: Get login page (may set CSRF tokens)
    login_page = session.get('https://example.com/login')

    # Step 2: Submit login credentials
    login_data = {
        'username': username,
        'password': password,
        'csrf_token': extract_csrf_token(login_page.text)  # Custom function
    }

    login_response = session.post('https://example.com/login', data=login_data)

    if login_response.status_code == 200:
        # Step 3: Access protected content
        protected_content = session.get('https://example.com/dashboard')
        return protected_content.text
    else:
        raise Exception("Login failed")

def extract_csrf_token(html):
    # Implementation depends on the website structure
    pass

Method 4: Persistent Cookie Storage

Saving Cookies with Pickle

import requests
import pickle
from pathlib import Path

def save_session_cookies(session, filepath):
    """Save session cookies to file"""
    with open(filepath, 'wb') as f:
        pickle.dump(session.cookies, f)

def load_session_cookies(session, filepath):
    """Load cookies from file into session"""
    if Path(filepath).exists():
        with open(filepath, 'rb') as f:
            cookies = pickle.load(f)
            session.cookies.update(cookies)
        return True
    return False

# Usage example
session = requests.Session()

# Try to load existing cookies
if not load_session_cookies(session, 'cookies.pkl'):
    # No saved cookies, need to login
    login_response = session.post('https://example.com/login', 
                                data={'user': 'john', 'pass': 'secret'})
    # Save cookies after successful login
    save_session_cookies(session, 'cookies.pkl')

# Use the session with loaded/saved cookies
response = session.get('https://example.com/protected-resource')

JSON-Based Cookie Storage (Human-Readable)

import requests
import json

def save_cookies_json(cookies, filepath):
    """Save cookies as JSON"""
    cookie_dict = {}
    for cookie in cookies:
        cookie_dict[cookie.name] = {
            'value': cookie.value,
            'domain': cookie.domain,
            'path': cookie.path
        }

    with open(filepath, 'w') as f:
        json.dump(cookie_dict, f, indent=2)

def load_cookies_json(session, filepath):
    """Load cookies from JSON file"""
    try:
        with open(filepath, 'r') as f:
            cookie_dict = json.load(f)

        for name, attrs in cookie_dict.items():
            session.cookies.set(name, attrs['value'], 
                              domain=attrs['domain'], 
                              path=attrs['path'])
        return True
    except FileNotFoundError:
        return False

Advanced Cookie Techniques

Cookie-Based Rate Limiting and Retry Logic

import requests
import time

def make_request_with_retry(session, url, max_retries=3):
    for attempt in range(max_retries):
        try:
            response = session.get(url, timeout=10)

            # Check for rate limiting cookies
            if 'rate_limit_remaining' in response.cookies:
                remaining = int(response.cookies['rate_limit_remaining'])
                if remaining < 5:  # Low rate limit
                    time.sleep(2)  # Wait before next request

            return response

        except requests.RequestException as e:
            if attempt == max_retries - 1:
                raise e
            time.sleep(2 ** attempt)  # Exponential backoff

Debugging Cookie Issues

import requests
import logging

# Enable detailed logging for debugging
logging.basicConfig(level=logging.DEBUG)

session = requests.Session()

# Add request/response hooks for cookie debugging
def log_cookies(response, *args, **kwargs):
    print(f"Response cookies: {dict(response.cookies)}")
    print(f"Request cookies: {dict(response.request._cookies)}")

session.hooks['response'].append(log_cookies)

response = session.get('https://httpbin.org/cookies/set/debug/true')

Best Practices and Security Considerations

  1. Always use Sessions for multiple requests to the same site
  2. Validate cookie security - check for Secure and HttpOnly flags
  3. Handle cookie expiration by checking expiry dates
  4. Respect robots.txt and website terms of service
  5. Use HTTPS when handling authentication cookies
  6. Clear sensitive cookies after use to prevent security issues
# Example: Secure cookie handling
def secure_request_session():
    session = requests.Session()

    # Set secure defaults
    session.headers.update({
        'User-Agent': 'Mozilla/5.0 (compatible; MyBot/1.0)',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
    })

    return session

# Always clean up sensitive data
def cleanup_session(session):
    # Clear authentication cookies
    sensitive_cookies = ['session_token', 'auth_token', 'csrf_token']
    for cookie_name in sensitive_cookies:
        if cookie_name in session.cookies:
            del session.cookies[cookie_name]

Remember to always respect website terms of service and implement appropriate delays between requests to avoid overwhelming servers.

Try WebScraping.AI for Your Web Scraping Needs

Looking for a powerful web scraping solution? WebScraping.AI provides an LLM-powered API that combines Chromium JavaScript rendering with rotating proxies for reliable data extraction.

Key Features:

  • AI-powered extraction: Ask questions about web pages or extract structured data fields
  • JavaScript rendering: Full Chromium browser support for dynamic content
  • Rotating proxies: Datacenter and residential proxies from multiple countries
  • Easy integration: Simple REST API with SDKs for Python, Ruby, PHP, and more
  • Reliable & scalable: Built for developers who need consistent results

Getting Started:

Get page content with AI analysis:

curl "https://api.webscraping.ai/ai/question?url=https://example.com&question=What is the main topic?&api_key=YOUR_API_KEY"

Extract structured data:

curl "https://api.webscraping.ai/ai/fields?url=https://example.com&fields[title]=Page title&fields[price]=Product price&api_key=YOUR_API_KEY"

Try in request builder

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon