How do I set custom headers and user agents in Selenium WebDriver?
Setting custom headers and user agents in Selenium WebDriver is essential for web scraping, testing, and automation tasks. While Selenium doesn't directly support custom HTTP headers, there are several effective approaches to achieve this functionality across different browsers.
Setting Custom User Agents
The user agent is the most commonly modified header in web scraping scenarios. Here's how to set custom user agents for different browsers:
Chrome WebDriver
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def create_chrome_driver_with_user_agent(user_agent):
chrome_options = Options()
chrome_options.add_argument(f"--user-agent={user_agent}")
driver = webdriver.Chrome(options=chrome_options)
return driver
# Example usage
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
driver = create_chrome_driver_with_user_agent(custom_user_agent)
driver.get("https://httpbin.org/user-agent")
print(driver.page_source)
driver.quit()
Firefox WebDriver
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
def create_firefox_driver_with_user_agent(user_agent):
firefox_options = Options()
firefox_options.set_preference("general.useragent.override", user_agent)
driver = webdriver.Firefox(options=firefox_options)
return driver
# Example usage
mobile_user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1"
driver = create_firefox_driver_with_user_agent(mobile_user_agent)
driver.get("https://httpbin.org/user-agent")
print(driver.page_source)
driver.quit()
Edge WebDriver
from selenium import webdriver
from selenium.webdriver.edge.options import Options
def create_edge_driver_with_user_agent(user_agent):
edge_options = Options()
edge_options.add_argument(f"--user-agent={user_agent}")
driver = webdriver.Edge(options=edge_options)
return driver
# Example usage
bot_user_agent = "MyBot/1.0 (+https://example.com/bot)"
driver = create_edge_driver_with_user_agent(bot_user_agent)
driver.get("https://httpbin.org/user-agent")
print(driver.page_source)
driver.quit()
Setting Custom HTTP Headers
Since Selenium doesn't natively support custom HTTP headers, you need to use browser-specific approaches:
Method 1: Using Chrome DevTools Protocol (CDP)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import json
def create_chrome_driver_with_headers(headers):
chrome_options = Options()
chrome_options.add_argument("--enable-logging")
chrome_options.add_argument("--log-level=0")
driver = webdriver.Chrome(options=chrome_options)
# Enable network domain
driver.execute_cdp_cmd('Network.enable', {})
# Set custom headers
driver.execute_cdp_cmd('Network.setUserAgentOverride', {
"userAgent": headers.get('User-Agent', driver.execute_script("return navigator.userAgent;"))
})
# Set extra headers
extra_headers = {k: v for k, v in headers.items() if k != 'User-Agent'}
if extra_headers:
driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {"headers": extra_headers})
return driver
# Example usage
custom_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/json',
'Authorization': 'Bearer your-token-here',
'X-Custom-Header': 'custom-value'
}
driver = create_chrome_driver_with_headers(custom_headers)
driver.get("https://httpbin.org/headers")
print(driver.page_source)
driver.quit()
Method 2: Using Browser Extensions
For more complex header manipulation, you can create a browser extension:
import json
import zipfile
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def create_header_extension(headers):
manifest = {
"manifest_version": 2,
"name": "Custom Headers",
"version": "1.0",
"permissions": ["webRequest", "webRequestBlocking", "<all_urls>"],
"background": {
"scripts": ["background.js"],
"persistent": False
}
}
background_js = f"""
chrome.webRequest.onBeforeSendHeaders.addListener(
function(details) {{
var headers = {json.dumps(headers)};
for (var key in headers) {{
var found = false;
for (var i = 0; i < details.requestHeaders.length; i++) {{
if (details.requestHeaders[i].name.toLowerCase() === key.toLowerCase()) {{
details.requestHeaders[i].value = headers[key];
found = true;
break;
}}
}}
if (!found) {{
details.requestHeaders.push({{
name: key,
value: headers[key]
}});
}}
}}
return {{requestHeaders: details.requestHeaders}};
}},
{{urls: ["<all_urls>"]}},
["blocking", "requestHeaders"]
);
"""
# Create extension files
extension_dir = "custom_headers_extension"
os.makedirs(extension_dir, exist_ok=True)
with open(f"{extension_dir}/manifest.json", "w") as f:
json.dump(manifest, f)
with open(f"{extension_dir}/background.js", "w") as f:
f.write(background_js)
# Create zip file
with zipfile.ZipFile("custom_headers.crx", "w") as zf:
zf.write(f"{extension_dir}/manifest.json", "manifest.json")
zf.write(f"{extension_dir}/background.js", "background.js")
return "custom_headers.crx"
def create_driver_with_extension_headers(headers):
extension_path = create_header_extension(headers)
chrome_options = Options()
chrome_options.add_extension(extension_path)
driver = webdriver.Chrome(options=chrome_options)
return driver
# Example usage
headers = {
'User-Agent': 'Custom Bot 1.0',
'Accept': 'text/html,application/xhtml+xml',
'Accept-Language': 'en-US,en;q=0.9',
'X-Forwarded-For': '192.168.1.1'
}
driver = create_driver_with_extension_headers(headers)
driver.get("https://httpbin.org/headers")
print(driver.page_source)
driver.quit()
JavaScript Implementation
For JavaScript users, here's how to set custom headers and user agents:
const { Builder } = require('selenium-webdriver');
const chrome = require('selenium-webdriver/chrome');
async function createChromeDriverWithHeaders(headers) {
const options = new chrome.Options();
// Set user agent
if (headers['User-Agent']) {
options.addArguments(`--user-agent=${headers['User-Agent']}`);
}
const driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(options)
.build();
// Set additional headers using CDP
const executor = driver.getExecutor();
await executor.defineCommand('sendCommandWithResult', 'POST', '/session/:sessionId/chromium/send_command_and_get_result');
await driver.execute('sendCommandWithResult', {
cmd: 'Network.enable',
params: {}
});
// Set extra headers
const extraHeaders = Object.keys(headers)
.filter(key => key !== 'User-Agent')
.reduce((obj, key) => {
obj[key] = headers[key];
return obj;
}, {});
if (Object.keys(extraHeaders).length > 0) {
await driver.execute('sendCommandWithResult', {
cmd: 'Network.setExtraHTTPHeaders',
params: { headers: extraHeaders }
});
}
return driver;
}
// Example usage
async function example() {
const customHeaders = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
'Accept': 'application/json',
'X-API-Key': 'your-api-key'
};
const driver = await createChromeDriverWithHeaders(customHeaders);
await driver.get('https://httpbin.org/headers');
const pageSource = await driver.getPageSource();
console.log(pageSource);
await driver.quit();
}
example().catch(console.error);
Advanced Header Configuration
Dynamic Header Modification
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
class DynamicHeaderDriver:
def __init__(self):
chrome_options = Options()
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.execute_cdp_cmd('Network.enable', {})
def set_headers(self, headers):
"""Set headers for subsequent requests"""
user_agent = headers.get('User-Agent')
if user_agent:
self.driver.execute_cdp_cmd('Network.setUserAgentOverride', {
"userAgent": user_agent
})
extra_headers = {k: v for k, v in headers.items() if k != 'User-Agent'}
if extra_headers:
self.driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {
"headers": extra_headers
})
def get_page(self, url, headers=None):
"""Navigate to URL with optional headers"""
if headers:
self.set_headers(headers)
self.driver.get(url)
return self.driver.page_source
def quit(self):
self.driver.quit()
# Example usage
driver = DynamicHeaderDriver()
# First request with API headers
api_headers = {
'User-Agent': 'API Client 1.0',
'Authorization': 'Bearer token123',
'Content-Type': 'application/json'
}
response1 = driver.get_page('https://httpbin.org/headers', api_headers)
# Second request with different headers
browser_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml',
'Accept-Language': 'en-US,en;q=0.9'
}
response2 = driver.get_page('https://httpbin.org/headers', browser_headers)
driver.quit()
Best Practices and Considerations
1. Header Validation
Always validate that your headers are being sent correctly:
def validate_headers(driver, expected_headers):
"""Validate that headers are being sent correctly"""
driver.get("https://httpbin.org/headers")
import json
response = json.loads(driver.find_element("tag name", "pre").text)
actual_headers = response.get("headers", {})
for key, expected_value in expected_headers.items():
actual_value = actual_headers.get(key, "")
if actual_value != expected_value:
print(f"Header mismatch - {key}: expected '{expected_value}', got '{actual_value}'")
else:
print(f"Header correct - {key}: {actual_value}")
2. Error Handling
Implement proper error handling for header operations:
def safe_set_headers(driver, headers):
"""Safely set headers with error handling"""
try:
driver.execute_cdp_cmd('Network.enable', {})
if 'User-Agent' in headers:
driver.execute_cdp_cmd('Network.setUserAgentOverride', {
"userAgent": headers['User-Agent']
})
extra_headers = {k: v for k, v in headers.items() if k != 'User-Agent'}
if extra_headers:
driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {
"headers": extra_headers
})
return True
except Exception as e:
print(f"Error setting headers: {e}")
return False
3. Performance Considerations
When dealing with multiple requests, consider using browser session management techniques to maintain headers across requests while optimizing performance.
Common Use Cases
Setting custom headers and user agents is particularly useful for:
- API Testing: Setting authorization headers and content types
- Mobile Testing: Emulating mobile devices with appropriate user agents
- Geo-targeting: Adding location-based headers
- A/B Testing: Sending custom experiment headers
- Rate Limiting: Adding API keys and authentication tokens
Troubleshooting Common Issues
Headers Not Being Applied
If headers aren't being applied, ensure: 1. CDP commands are supported (Chrome/Edge only) 2. Network domain is enabled before setting headers 3. Headers are set before navigating to the page
Browser Compatibility
Different browsers have varying levels of support for custom headers. Chrome and Edge offer the most flexibility through CDP, while Firefox requires different approaches for certain headers.
Conclusion
Setting custom headers and user agents in Selenium WebDriver requires browser-specific approaches, with Chrome DevTools Protocol being the most versatile solution. While Selenium doesn't natively support all HTTP headers, the methods outlined above provide comprehensive coverage for most web scraping and testing scenarios.
For more complex scenarios involving dynamic content, consider exploring authentication handling techniques and network request monitoring to ensure your headers are working correctly across different types of web applications.