Running headless Chromium can be resource-intensive, especially when running multiple instances or on machines with limited resources. Here are proven strategies to significantly reduce CPU and memory usage:
Core Launch Flags
The most impactful optimization is using the right Chrome launch arguments:
Python (Selenium):
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--headless=new') # Use new headless mode (Chrome 109+)
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-gpu')
options.add_argument('--disable-extensions')
options.add_argument('--disable-plugins')
options.add_argument('--disable-images')
options.add_argument('--disable-javascript') # If JS not needed
options.add_argument('--disable-web-security')
options.add_argument('--disable-features=TranslateUI')
options.add_argument('--disable-ipc-flooding-protection')
options.add_argument('--memory-pressure-off')
# Set window size to reduce rendering overhead
options.add_argument('--window-size=800,600')
driver = webdriver.Chrome(options=options)
Node.js (Puppeteer):
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--single-process',
'--disable-gpu',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--memory-pressure-off'
]
});
Memory Management
Set Memory Limits
Limit Chrome's memory usage with system-level controls:
# Using systemd-run (Linux)
systemd-run --scope -p MemoryLimit=512M your-chrome-command
# Using Docker
docker run --memory=512m your-chrome-image
# Using ulimit
ulimit -v 524288 # 512MB in KB
Configure Page Cache
# Disable page cache to reduce memory usage
prefs = {
'profile.default_content_setting_values.notifications': 2,
'profile.default_content_settings.popups': 0,
'profile.managed_default_content_settings.images': 2,
'disk-cache-size': 0,
'media-cache-size': 0
}
options.add_experimental_option('prefs', prefs)
Resource Blocking
Block unnecessary content to reduce CPU and bandwidth usage:
Python (Selenium with Chrome DevTools Protocol):
import json
def block_resources(driver):
# Enable Network domain
driver.execute_cdp_cmd('Network.enable', {})
# Block images, stylesheets, fonts
driver.execute_cdp_cmd('Network.setRequestInterception', {
'patterns': [
{'urlPattern': '*.css', 'resourceType': 'Stylesheet'},
{'urlPattern': '*.jpg', 'resourceType': 'Image'},
{'urlPattern': '*.jpeg', 'resourceType': 'Image'},
{'urlPattern': '*.png', 'resourceType': 'Image'},
{'urlPattern': '*.gif', 'resourceType': 'Image'},
{'urlPattern': '*.woff*', 'resourceType': 'Font'},
]
})
# Use after creating driver
block_resources(driver)
Puppeteer Resource Blocking:
const page = await browser.newPage();
// Block images, CSS, fonts
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
if (['image', 'stylesheet', 'font'].includes(resourceType)) {
req.abort();
} else {
req.continue();
}
});
Connection Pooling & Reuse
Instead of creating new browser instances, reuse connections:
Python Connection Pool:
class ChromiumPool:
def __init__(self, pool_size=3):
self.pool = []
self.pool_size = pool_size
self._initialize_pool()
def _initialize_pool(self):
for _ in range(self.pool_size):
options = webdriver.ChromeOptions()
# Add your optimized arguments here
driver = webdriver.Chrome(options=options)
self.pool.append(driver)
def get_driver(self):
return self.pool.pop() if self.pool else None
def return_driver(self, driver):
# Clear cookies/cache before returning
driver.delete_all_cookies()
self.pool.append(driver)
# Usage
pool = ChromiumPool(pool_size=5)
driver = pool.get_driver()
# Use driver...
pool.return_driver(driver)
Monitoring & Limits
Process Monitoring
import psutil
import os
def monitor_chrome_usage():
chrome_processes = []
for proc in psutil.process_iter(['pid', 'name', 'memory_info', 'cpu_percent']):
if 'chrome' in proc.info['name'].lower():
chrome_processes.append(proc)
total_memory = sum(p.info['memory_info'].rss for p in chrome_processes)
total_cpu = sum(p.info['cpu_percent'] for p in chrome_processes)
print(f"Chrome Memory: {total_memory / 1024 / 1024:.2f} MB")
print(f"Chrome CPU: {total_cpu:.2f}%")
return total_memory, total_cpu
# Kill high-memory processes
def kill_high_memory_chrome(max_memory_mb=500):
for proc in psutil.process_iter(['pid', 'name', 'memory_info']):
if 'chrome' in proc.info['name'].lower():
memory_mb = proc.info['memory_info'].rss / 1024 / 1024
if memory_mb > max_memory_mb:
proc.kill()
Timeout Management
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Set aggressive timeouts
driver.set_page_load_timeout(10) # 10 seconds max
driver.implicitly_wait(5)
# Use explicit waits for specific elements only
try:
element = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.ID, "target-element"))
)
except TimeoutException:
# Handle timeout gracefully
pass
Alternative Lightweight Options
For simple HTML parsing without JavaScript:
1. requests-html (Python):
from requests_html import HTMLSession
session = HTMLSession()
session.browser # Only create browser when needed
r = session.get('https://example.com')
r.html.render(timeout=10, keep_page=True) # Render only when needed
print(r.html.text)
2. Chrome in Single Process Mode:
options.add_argument('--single-process') # Use with caution
options.add_argument('--no-zygote')
Environment-Specific Optimizations
Docker Container
# Use minimal base image
FROM zenika/alpine-chrome:latest
# Set memory limits
ENV CHROME_ARGS="--memory-pressure-off --max_old_space_size=256"
# Run with limited resources
RUN addgroup -g 1001 -S nodejs && adduser -S nodejs -u 1001
USER nodejs
CI/CD Environment
# GitHub Actions example
- name: Run headless Chrome tests
run: |
export CHROME_FLAGS="--headless --no-sandbox --disable-gpu --disable-dev-shm-usage"
npm test
env:
NODE_OPTIONS: "--max-old-space-size=512"
Performance Best Practices
- Batch Operations: Process multiple URLs in the same browser session
- Page Lifecycle: Use
page.goto()
withwaitUntil: 'domcontentloaded'
instead ofnetworkidle
- Tab Management: Close tabs immediately after use with
page.close()
- Clean Up: Always call
browser.close()
in finally blocks - Resource Monitoring: Implement memory/CPU monitoring and automatic restart
These optimizations can reduce headless Chromium's resource usage by 50-80% while maintaining functionality for most web scraping tasks.