How do you handle API rate limiting when scraping websites?
API rate limiting is one of the most common challenges developers face when scraping websites or consuming APIs at scale. Rate limiting is a protective mechanism that servers use to prevent abuse and ensure service availability for all users. Understanding how to handle these limits properly is crucial for building robust and respectful web scraping applications.
Understanding Rate Limiting
Rate limiting restricts the number of requests a client can make within a specific time window. Common rate limiting patterns include:
- Requests per second/minute/hour: A fixed number of requests allowed in a time period
- Burst limits: Allow temporary spikes with a recovery period
- Concurrent request limits: Maximum simultaneous connections
- IP-based or token-based limits: Different limits for different authentication levels
Common Rate Limiting HTTP Status Codes
When you exceed rate limits, servers typically respond with specific HTTP status codes:
- 429 Too Many Requests: The standard rate limiting response
- 503 Service Unavailable: Server temporarily overloaded
- 420 Enhance Your Calm: Twitter's custom rate limit response
Implementing Rate Limiting Strategies
1. Exponential Backoff Algorithm
Exponential backoff is a standard approach where you progressively increase the delay between retries:
import time
import random
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class RateLimitHandler:
def __init__(self, base_delay=1, max_delay=60, backoff_factor=2):
self.base_delay = base_delay
self.max_delay = max_delay
self.backoff_factor = backoff_factor
def make_request_with_backoff(self, url, max_retries=5):
delay = self.base_delay
for attempt in range(max_retries):
try:
response = requests.get(url, timeout=30)
if response.status_code == 200:
return response
elif response.status_code == 429:
# Check if server provides retry-after header
retry_after = response.headers.get('Retry-After')
if retry_after:
delay = int(retry_after)
else:
# Add jitter to prevent thundering herd
jitter = random.uniform(0.1, 0.3) * delay
delay = min(delay * self.backoff_factor + jitter, self.max_delay)
print(f"Rate limited. Waiting {delay} seconds...")
time.sleep(delay)
else:
response.raise_for_status()
except requests.exceptions.RequestException as e:
if attempt == max_retries - 1:
raise e
time.sleep(delay)
delay = min(delay * self.backoff_factor, self.max_delay)
raise Exception(f"Failed to complete request after {max_retries} attempts")
# Usage
handler = RateLimitHandler()
response = handler.make_request_with_backoff('https://api.example.com/data')
2. Token Bucket Algorithm
The token bucket algorithm is excellent for maintaining a steady request rate:
import time
import threading
from collections import deque
class TokenBucket:
def __init__(self, capacity, refill_rate):
self.capacity = capacity
self.tokens = capacity
self.refill_rate = refill_rate
self.last_refill = time.time()
self.lock = threading.Lock()
def consume(self, tokens=1):
with self.lock:
now = time.time()
# Add tokens based on time elapsed
tokens_to_add = (now - self.last_refill) * self.refill_rate
self.tokens = min(self.capacity, self.tokens + tokens_to_add)
self.last_refill = now
if self.tokens >= tokens:
self.tokens -= tokens
return True
return False
def wait_for_token(self, tokens=1):
while not self.consume(tokens):
time.sleep(0.1)
# Usage with requests
bucket = TokenBucket(capacity=10, refill_rate=2) # 2 tokens per second
def rate_limited_request(url):
bucket.wait_for_token()
return requests.get(url)
3. JavaScript Implementation with Async/Await
For Node.js applications, here's a rate-limited request handler:
class RateLimiter {
constructor(requestsPerSecond = 1) {
this.requestsPerSecond = requestsPerSecond;
this.lastRequestTime = 0;
this.queue = [];
this.processing = false;
}
async makeRequest(url, options = {}) {
return new Promise((resolve, reject) => {
this.queue.push({ url, options, resolve, reject });
this.processQueue();
});
}
async processQueue() {
if (this.processing || this.queue.length === 0) return;
this.processing = true;
while (this.queue.length > 0) {
const now = Date.now();
const timeSinceLastRequest = now - this.lastRequestTime;
const minInterval = 1000 / this.requestsPerSecond;
if (timeSinceLastRequest < minInterval) {
await this.sleep(minInterval - timeSinceLastRequest);
}
const { url, options, resolve, reject } = this.queue.shift();
this.lastRequestTime = Date.now();
try {
const response = await this.executeRequest(url, options);
resolve(response);
} catch (error) {
if (error.response && error.response.status === 429) {
// Re-queue the request with exponential backoff
const retryAfter = error.response.headers['retry-after'];
const delay = retryAfter ? parseInt(retryAfter) * 1000 : 5000;
setTimeout(() => {
this.queue.unshift({ url, options, resolve, reject });
}, delay);
} else {
reject(error);
}
}
}
this.processing = false;
}
async executeRequest(url, options) {
const fetch = require('node-fetch');
const response = await fetch(url, options);
if (response.status === 429) {
const error = new Error('Rate limited');
error.response = response;
throw error;
}
return response;
}
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
// Usage
const limiter = new RateLimiter(2); // 2 requests per second
async function scrapeWithRateLimit() {
try {
const response = await limiter.makeRequest('https://api.example.com/data');
const data = await response.json();
console.log(data);
} catch (error) {
console.error('Request failed:', error.message);
}
}
Advanced Rate Limiting Techniques
1. Distributed Rate Limiting
For applications running across multiple servers, implement distributed rate limiting using Redis:
import redis
import time
import json
class DistributedRateLimiter:
def __init__(self, redis_client, key_prefix="rate_limit"):
self.redis = redis_client
self.key_prefix = key_prefix
def is_allowed(self, identifier, limit, window_seconds):
key = f"{self.key_prefix}:{identifier}"
pipe = self.redis.pipeline()
now = time.time()
# Remove expired entries
pipe.zremrangebyscore(key, 0, now - window_seconds)
# Count current requests
pipe.zcard(key)
# Add current request
pipe.zadd(key, {str(now): now})
# Set expiration
pipe.expire(key, window_seconds)
results = pipe.execute()
current_requests = results[1]
return current_requests < limit
# Usage
redis_client = redis.Redis(host='localhost', port=6379, db=0)
limiter = DistributedRateLimiter(redis_client)
if limiter.is_allowed("user_123", limit=100, window_seconds=3600):
# Make request
response = requests.get(url)
else:
print("Rate limit exceeded")
2. Adaptive Rate Limiting
Implement adaptive rate limiting that adjusts based on server responses:
class AdaptiveRateLimiter:
def __init__(self, initial_rate=1.0, min_rate=0.1, max_rate=10.0):
self.current_rate = initial_rate
self.min_rate = min_rate
self.max_rate = max_rate
self.success_count = 0
self.last_request_time = 0
def adjust_rate(self, success):
if success:
self.success_count += 1
# Gradually increase rate after consecutive successes
if self.success_count >= 10:
self.current_rate = min(self.current_rate * 1.1, self.max_rate)
self.success_count = 0
else:
# Immediately reduce rate on failure
self.current_rate = max(self.current_rate * 0.5, self.min_rate)
self.success_count = 0
def wait_if_needed(self):
now = time.time()
elapsed = now - self.last_request_time
required_interval = 1.0 / self.current_rate
if elapsed < required_interval:
time.sleep(required_interval - elapsed)
self.last_request_time = time.time()
def make_request(self, url):
self.wait_if_needed()
try:
response = requests.get(url, timeout=30)
success = response.status_code != 429
self.adjust_rate(success)
return response
except requests.exceptions.RequestException:
self.adjust_rate(False)
raise
Respecting Server-Provided Rate Limit Information
Many APIs provide rate limit information in response headers. Always check and respect these headers:
def parse_rate_limit_headers(response):
headers = response.headers
rate_limit_info = {
'limit': headers.get('X-RateLimit-Limit'),
'remaining': headers.get('X-RateLimit-Remaining'),
'reset': headers.get('X-RateLimit-Reset'),
'retry_after': headers.get('Retry-After')
}
return rate_limit_info
def smart_request(url):
response = requests.get(url)
rate_info = parse_rate_limit_headers(response)
if rate_info['remaining'] and int(rate_info['remaining']) < 5:
# Slow down when approaching limit
time.sleep(2)
return response
Integration with Web Scraping Tools
When using browser automation tools like Puppeteer, you can implement rate limiting at the page navigation level. For complex scenarios involving handling browser sessions in Puppeteer, rate limiting becomes even more important to avoid detection.
Best Practices and Recommendations
1. Monitor and Log Rate Limit Events
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def log_rate_limit_event(response, retry_count=0):
logger.warning(f"Rate limited. Status: {response.status_code}, "
f"Retry attempt: {retry_count}, "
f"Headers: {dict(response.headers)}")
2. Implement Circuit Breaker Pattern
from enum import Enum
import time
class CircuitState(Enum):
CLOSED = 1
OPEN = 2
HALF_OPEN = 3
class CircuitBreaker:
def __init__(self, failure_threshold=5, timeout=60):
self.failure_threshold = failure_threshold
self.timeout = timeout
self.failure_count = 0
self.last_failure_time = None
self.state = CircuitState.CLOSED
def call(self, func, *args, **kwargs):
if self.state == CircuitState.OPEN:
if time.time() - self.last_failure_time > self.timeout:
self.state = CircuitState.HALF_OPEN
else:
raise Exception("Circuit breaker is OPEN")
try:
result = func(*args, **kwargs)
self.on_success()
return result
except Exception as e:
self.on_failure()
raise e
def on_success(self):
self.failure_count = 0
self.state = CircuitState.CLOSED
def on_failure(self):
self.failure_count += 1
self.last_failure_time = time.time()
if self.failure_count >= self.failure_threshold:
self.state = CircuitState.OPEN
3. Use Connection Pooling
Configure your HTTP client to use connection pooling for better performance:
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
def create_session_with_retries():
session = requests.Session()
retry_strategy = Retry(
total=3,
status_forcelist=[429, 500, 502, 503, 504],
method_whitelist=["HEAD", "GET", "OPTIONS"],
backoff_factor=1
)
adapter = HTTPAdapter(
pool_connections=10,
pool_maxsize=10,
max_retries=retry_strategy
)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
Testing Rate Limiting Implementation
Create unit tests to verify your rate limiting logic:
import unittest
from unittest.mock import patch, MagicMock
import time
class TestRateLimiting(unittest.TestCase):
def test_token_bucket_consumption(self):
bucket = TokenBucket(capacity=5, refill_rate=1)
# Should allow initial requests
self.assertTrue(bucket.consume(3))
self.assertTrue(bucket.consume(2))
# Should deny when empty
self.assertFalse(bucket.consume(1))
@patch('time.sleep')
def test_exponential_backoff(self, mock_sleep):
handler = RateLimitHandler(base_delay=1, backoff_factor=2)
with patch('requests.get') as mock_get:
# Simulate rate limiting then success
mock_response = MagicMock()
mock_response.status_code = 429
mock_get.side_effect = [mock_response, mock_response,
MagicMock(status_code=200)]
response = handler.make_request_with_backoff('http://test.com')
# Verify backoff delays were called
self.assertEqual(mock_sleep.call_count, 2)
Conclusion
Handling API rate limiting effectively requires a combination of strategies tailored to your specific use case. The key principles are:
- Respect server limits: Always honor rate limit headers and responses
- Implement progressive backoff: Use exponential backoff with jitter
- Monitor and adapt: Track your request patterns and adjust accordingly
- Plan for failures: Implement circuit breakers and proper error handling
- Test thoroughly: Verify your rate limiting logic works under various conditions
When working with complex scraping scenarios that involve monitoring network requests in Puppeteer, these rate limiting strategies become even more crucial for maintaining reliable and respectful web scraping operations.
By implementing these techniques, you'll build more robust applications that can handle rate limiting gracefully while maintaining good relationships with the APIs and websites you're accessing.