How to Configure Proxy Settings for Selenium WebDriver
Configuring proxy settings in Selenium WebDriver is essential for web scraping projects that require IP rotation, geographical targeting, or bypassing network restrictions. This comprehensive guide covers proxy configuration across different programming languages and browser drivers.
Understanding Proxy Types
Before diving into configuration, it's important to understand the different proxy types available:
- HTTP Proxy: Routes HTTP traffic through a proxy server
- HTTPS Proxy: Routes HTTPS traffic through a proxy server
- SOCKS Proxy: Routes traffic at the socket level, supporting both HTTP and HTTPS
- PAC (Proxy Auto-Config): Uses a script to automatically determine proxy settings
Python Selenium Proxy Configuration
Basic HTTP Proxy Setup
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.chrome.options import Options
# Method 1: Using Proxy class
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = "ip:port"
proxy.https_proxy = "ip:port"
capabilities = webdriver.DesiredCapabilities.CHROME
proxy.add_to_capabilities(capabilities)
driver = webdriver.Chrome(desired_capabilities=capabilities)
Chrome Options Proxy Configuration
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--proxy-server=http://proxy-server:port")
# For authenticated proxies
chrome_options.add_argument("--proxy-auth=username:password")
# Disable proxy for specific domains
chrome_options.add_argument("--proxy-bypass-list=localhost,127.0.0.1")
driver = webdriver.Chrome(options=chrome_options)
SOCKS Proxy Configuration
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.socks_proxy = "ip:port"
proxy.socks_version = 5 # SOCKS5
capabilities = webdriver.DesiredCapabilities.CHROME
proxy.add_to_capabilities(capabilities)
driver = webdriver.Chrome(desired_capabilities=capabilities)
Firefox Proxy Configuration
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
firefox_options = Options()
firefox_options.set_preference("network.proxy.type", 1)
firefox_options.set_preference("network.proxy.http", "proxy-server")
firefox_options.set_preference("network.proxy.http_port", 8080)
firefox_options.set_preference("network.proxy.ssl", "proxy-server")
firefox_options.set_preference("network.proxy.ssl_port", 8080)
driver = webdriver.Firefox(options=firefox_options)
Java Selenium Proxy Configuration
Basic Proxy Setup in Java
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.Proxy;
public class ProxyConfiguration {
public static void main(String[] args) {
// Create proxy object
Proxy proxy = new Proxy();
proxy.setHttpProxy("proxy-server:port");
proxy.setSslProxy("proxy-server:port");
// Configure Chrome options
ChromeOptions options = new ChromeOptions();
options.setProxy(proxy);
WebDriver driver = new ChromeDriver(options);
// Your automation code here
driver.quit();
}
}
SOCKS Proxy in Java
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.Proxy;
Proxy proxy = new Proxy();
proxy.setSocksProxy("proxy-server:port");
proxy.setSocksVersion(5);
ChromeOptions options = new ChromeOptions();
options.setProxy(proxy);
WebDriver driver = new ChromeDriver(options);
C# Selenium Proxy Configuration
Basic Proxy Setup in C
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
class Program
{
static void Main()
{
var proxy = new Proxy();
proxy.HttpProxy = "proxy-server:port";
proxy.SslProxy = "proxy-server:port";
var options = new ChromeOptions();
options.Proxy = proxy;
var driver = new ChromeDriver(options);
// Your automation code here
driver.Quit();
}
}
Advanced Proxy Configuration Techniques
Rotating Proxies
import random
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
proxy_list = [
"proxy1:port1",
"proxy2:port2",
"proxy3:port3"
]
def create_driver_with_proxy():
proxy = random.choice(proxy_list)
chrome_options = Options()
chrome_options.add_argument(f"--proxy-server=http://{proxy}")
return webdriver.Chrome(options=chrome_options)
# Use different proxy for each session
driver = create_driver_with_proxy()
Authenticated Proxy Setup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile
import os
def create_proxy_extension(proxy_host, proxy_port, username, password):
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
}
}
"""
background_js = f"""
var config = {{
mode: "fixed_servers",
rules: {{
singleProxy: {{
scheme: "http",
host: "{proxy_host}",
port: parseInt({proxy_port})
}},
bypassList: ["localhost"]
}}
}};
chrome.proxy.settings.set({{value: config, scope: "regular"}}, function() {{}});
function callbackFn(details) {{
return {{
authCredentials: {{
username: "{username}",
password: "{password}"
}}
}};
}}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{{urls: ["<all_urls>"]}},
['blocking']
);
"""
# Create extension
extension_dir = "proxy_auth_extension"
os.makedirs(extension_dir, exist_ok=True)
with open(f"{extension_dir}/manifest.json", "w") as f:
f.write(manifest_json)
with open(f"{extension_dir}/background.js", "w") as f:
f.write(background_js)
# Create zip file
with zipfile.ZipFile("proxy_auth_extension.zip", "w") as zf:
zf.write(f"{extension_dir}/manifest.json", "manifest.json")
zf.write(f"{extension_dir}/background.js", "background.js")
return "proxy_auth_extension.zip"
# Usage
chrome_options = Options()
extension_path = create_proxy_extension("proxy-server", "8080", "username", "password")
chrome_options.add_extension(extension_path)
driver = webdriver.Chrome(options=chrome_options)
Proxy Configuration Best Practices
1. Connection Testing
def test_proxy_connection(proxy_url):
chrome_options = Options()
chrome_options.add_argument(f"--proxy-server={proxy_url}")
chrome_options.add_argument("--headless")
try:
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://httpbin.org/ip")
ip_info = driver.page_source
driver.quit()
return "origin" in ip_info
except Exception as e:
print(f"Proxy test failed: {e}")
return False
# Test before using
if test_proxy_connection("http://proxy-server:8080"):
print("Proxy is working")
else:
print("Proxy connection failed")
2. Proxy Pool Management
import time
from selenium.common.exceptions import WebDriverException
class ProxyPool:
def __init__(self, proxy_list):
self.proxy_list = proxy_list
self.current_proxy = 0
self.failed_proxies = set()
def get_next_proxy(self):
available_proxies = [p for p in self.proxy_list if p not in self.failed_proxies]
if not available_proxies:
# Reset failed proxies after some time
self.failed_proxies.clear()
available_proxies = self.proxy_list
proxy = available_proxies[self.current_proxy % len(available_proxies)]
self.current_proxy += 1
return proxy
def mark_proxy_failed(self, proxy):
self.failed_proxies.add(proxy)
def create_driver(self):
while True:
proxy = self.get_next_proxy()
try:
chrome_options = Options()
chrome_options.add_argument(f"--proxy-server=http://{proxy}")
driver = webdriver.Chrome(options=chrome_options)
# Test the proxy
driver.set_page_load_timeout(10)
driver.get("https://httpbin.org/ip")
return driver
except WebDriverException as e:
self.mark_proxy_failed(proxy)
if driver:
driver.quit()
continue
3. Handling Proxy Authentication
For proxies requiring authentication, when working with browser automation tools similar to how to handle authentication in Puppeteer, you have several options:
# Option 1: Use proxy URL with credentials
proxy_url = "http://username:password@proxy-server:8080"
chrome_options = Options()
chrome_options.add_argument(f"--proxy-server={proxy_url}")
# Option 2: Use Chrome extension (shown in advanced section above)
# Option 3: Use system proxy settings
import os
os.environ['HTTP_PROXY'] = 'http://username:password@proxy-server:8080'
os.environ['HTTPS_PROXY'] = 'http://username:password@proxy-server:8080'
Common Proxy Issues and Solutions
1. Proxy Connection Timeout
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--proxy-server=http://proxy-server:8080")
chrome_options.add_argument("--proxy-server-fallback-timeout=5000") # 5 seconds
chrome_options.add_argument("--max-proxy-resolve-time=10000") # 10 seconds
driver = webdriver.Chrome(options=chrome_options)
2. Bypassing Proxy for Specific Domains
chrome_options = Options()
chrome_options.add_argument("--proxy-server=http://proxy-server:8080")
chrome_options.add_argument("--proxy-bypass-list=localhost,127.0.0.1,*.internal.com")
driver = webdriver.Chrome(options=chrome_options)
3. Handling SSL Certificate Issues
chrome_options = Options()
chrome_options.add_argument("--proxy-server=http://proxy-server:8080")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--ignore-ssl-errors")
chrome_options.add_argument("--allow-running-insecure-content")
driver = webdriver.Chrome(options=chrome_options)
Proxy Configuration for Different Browsers
Edge WebDriver
from selenium import webdriver
from selenium.webdriver.edge.options import Options
edge_options = Options()
edge_options.add_argument("--proxy-server=http://proxy-server:8080")
driver = webdriver.Edge(options=edge_options)
Safari WebDriver
from selenium import webdriver
from selenium.webdriver.safari.options import Options
# Safari proxy configuration is typically done at system level
# or through Safari preferences
safari_options = Options()
driver = webdriver.Safari(options=safari_options)
Monitoring and Debugging Proxy Connections
Network Request Monitoring
Similar to how to monitor network requests in Puppeteer, you can monitor proxy connections:
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
# Enable logging
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'}
chrome_options = Options()
chrome_options.add_argument("--proxy-server=http://proxy-server:8080")
driver = webdriver.Chrome(options=chrome_options, desired_capabilities=caps)
# Get network logs
logs = driver.get_log('performance')
for log in logs:
print(log)
IP Verification
def verify_proxy_ip(driver, expected_proxy_ip=None):
driver.get("https://httpbin.org/ip")
response = driver.find_element("tag name", "pre").text
current_ip = response.split('"')[3]
if expected_proxy_ip and current_ip != expected_proxy_ip:
print(f"Warning: Expected {expected_proxy_ip}, got {current_ip}")
return current_ip
Conclusion
Configuring proxy settings in Selenium WebDriver is crucial for effective web scraping and automation. Whether you're using HTTP, HTTPS, or SOCKS proxies, the key is to properly configure the browser options and handle authentication when required. Remember to test your proxy connections, implement proper error handling, and consider using proxy pools for better reliability and performance.
For more advanced web scraping scenarios, consider using dedicated web scraping APIs that handle proxy rotation and management automatically, providing more reliable and scalable solutions for your data extraction needs.