How do I rotate user agents in JavaScript web scraping?
User agent rotation is a crucial technique in web scraping that helps avoid detection and blocking by websites. By switching between different user agent strings, your scraping scripts can appear as different browsers and devices, making it harder for anti-bot systems to identify and block your requests.
Understanding User Agents
A user agent string identifies the browser, operating system, and device making the request. Websites use this information to serve appropriate content and detect potential bots. Here's what a typical user agent looks like:
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36
Basic User Agent Rotation with Arrays
The simplest approach to user agent rotation involves maintaining an array of user agent strings and randomly selecting one for each request.
Creating a User Agent Pool
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
];
function getRandomUserAgent() {
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
User Agent Rotation with Puppeteer
Puppeteer is one of the most popular tools for browser automation in JavaScript. Here's how to implement user agent rotation:
Basic Puppeteer Implementation
const puppeteer = require('puppeteer');
class UserAgentRotator {
constructor() {
this.userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59'
];
this.currentIndex = 0;
}
getNext() {
const userAgent = this.userAgents[this.currentIndex];
this.currentIndex = (this.currentIndex + 1) % this.userAgents.length;
return userAgent;
}
getRandom() {
return this.userAgents[Math.floor(Math.random() * this.userAgents.length)];
}
}
async function scrapeWithRotation() {
const browser = await puppeteer.launch({ headless: true });
const rotator = new UserAgentRotator();
try {
for (let i = 0; i < 5; i++) {
const page = await browser.newPage();
const userAgent = rotator.getRandom();
await page.setUserAgent(userAgent);
console.log(`Using User Agent: ${userAgent}`);
await page.goto('https://httpbin.org/user-agent', {
waitUntil: 'networkidle2'
});
const content = await page.content();
console.log(`Response ${i + 1}:`, content);
await page.close();
// Add delay between requests
await new Promise(resolve => setTimeout(resolve, 1000));
}
} finally {
await browser.close();
}
}
scrapeWithRotation();
Advanced Puppeteer with Request Interception
For more sophisticated user agent rotation, you can intercept requests and modify headers:
async function advancedUserAgentRotation() {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
const rotator = new UserAgentRotator();
// Enable request interception
await page.setRequestInterception(true);
page.on('request', (request) => {
const headers = request.headers();
headers['user-agent'] = rotator.getRandom();
request.continue({
headers: headers
});
});
await page.goto('https://httpbin.org/headers');
const content = await page.content();
console.log(content);
await browser.close();
}
User Agent Rotation with Playwright
Playwright offers similar capabilities with slight syntax differences:
const { chromium } = require('playwright');
async function playwrightUserAgentRotation() {
const browser = await chromium.launch();
const rotator = new UserAgentRotator();
for (let i = 0; i < 3; i++) {
const context = await browser.newContext({
userAgent: rotator.getNext()
});
const page = await context.newPage();
console.log(`Using User Agent: ${await page.evaluate(() => navigator.userAgent)}`);
await page.goto('https://httpbin.org/user-agent');
const content = await page.textContent('body');
console.log(`Response ${i + 1}:`, content);
await context.close();
}
await browser.close();
}
HTTP Requests with Axios
For non-browser-based scraping, you can rotate user agents with HTTP libraries like Axios:
const axios = require('axios');
class HttpScraper {
constructor() {
this.rotator = new UserAgentRotator();
}
async makeRequest(url) {
const userAgent = this.rotator.getRandom();
try {
const response = await axios.get(url, {
headers: {
'User-Agent': userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive'
},
timeout: 10000
});
console.log(`Request with User Agent: ${userAgent}`);
return response.data;
} catch (error) {
console.error(`Request failed: ${error.message}`);
throw error;
}
}
async scrapeMultipleUrls(urls) {
const results = [];
for (const url of urls) {
try {
const data = await this.makeRequest(url);
results.push({ url, data, success: true });
} catch (error) {
results.push({ url, error: error.message, success: false });
}
// Add delay between requests
await new Promise(resolve => setTimeout(resolve, 1000));
}
return results;
}
}
// Usage
const scraper = new HttpScraper();
scraper.scrapeMultipleUrls([
'https://httpbin.org/user-agent',
'https://httpbin.org/headers'
]).then(results => {
console.log('Scraping results:', results);
});
Dynamic User Agent Generation
Instead of using a static list, you can generate user agents dynamically:
const UAParser = require('ua-parser-js');
class DynamicUserAgentGenerator {
constructor() {
this.browsers = ['Chrome', 'Firefox', 'Safari', 'Edge'];
this.operatingSystems = [
{ name: 'Windows', versions: ['10.0', '11.0'] },
{ name: 'macOS', versions: ['10_15_7', '11_6', '12_0'] },
{ name: 'Linux', versions: ['x86_64'] }
];
}
generateChromeUA(osName, osVersion) {
const chromeVersion = this.getRandomChromeVersion();
const webkitVersion = '537.36';
switch (osName) {
case 'Windows':
return `Mozilla/5.0 (Windows NT ${osVersion}; Win64; x64) AppleWebKit/${webkitVersion} (KHTML, like Gecko) Chrome/${chromeVersion} Safari/${webkitVersion}`;
case 'macOS':
return `Mozilla/5.0 (Macintosh; Intel Mac OS X ${osVersion}) AppleWebKit/${webkitVersion} (KHTML, like Gecko) Chrome/${chromeVersion} Safari/${webkitVersion}`;
case 'Linux':
return `Mozilla/5.0 (X11; Linux ${osVersion}) AppleWebKit/${webkitVersion} (KHTML, like Gecko) Chrome/${chromeVersion} Safari/${webkitVersion}`;
default:
return this.getRandomUserAgent();
}
}
getRandomChromeVersion() {
const versions = ['91.0.4472.124', '92.0.4515.107', '93.0.4577.82', '94.0.4606.61'];
return versions[Math.floor(Math.random() * versions.length)];
}
generate() {
const os = this.operatingSystems[Math.floor(Math.random() * this.operatingSystems.length)];
const osVersion = os.versions[Math.floor(Math.random() * os.versions.length)];
return this.generateChromeUA(os.name, osVersion);
}
}
Best Practices for User Agent Rotation
1. Maintain Realistic Patterns
class RealisticUserAgentRotator {
constructor() {
this.sessionUserAgent = null;
this.sessionDuration = 0;
this.maxSessionDuration = 50; // requests per session
}
getUserAgent() {
// Keep same user agent for session duration
if (this.sessionUserAgent && this.sessionDuration < this.maxSessionDuration) {
this.sessionDuration++;
return this.sessionUserAgent;
}
// Start new session
this.sessionUserAgent = this.generateNewUserAgent();
this.sessionDuration = 1;
return this.sessionUserAgent;
}
generateNewUserAgent() {
// Your user agent generation logic
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
}
2. Combine with Other Anti-Detection Techniques
When handling browser sessions in Puppeteer, combine user agent rotation with other techniques:
async function stealthScraping() {
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu'
]
});
const page = await browser.newPage();
// Set user agent
await page.setUserAgent(rotator.getRandom());
// Set viewport
await page.setViewport({
width: 1366 + Math.floor(Math.random() * 100),
height: 768 + Math.floor(Math.random() * 100)
});
// Set additional headers
await page.setExtraHTTPHeaders({
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br'
});
await page.goto(url, { waitUntil: 'networkidle2' });
}
3. Error Handling and Retry Logic
async function robustScraping(url, maxRetries = 3) {
const rotator = new UserAgentRotator();
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.setUserAgent(rotator.getRandom());
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 30000
});
const data = await page.evaluate(() => {
return document.body.innerText;
});
await browser.close();
return data;
} catch (error) {
console.log(`Attempt ${attempt} failed: ${error.message}`);
if (attempt === maxRetries) {
throw new Error(`Failed after ${maxRetries} attempts`);
}
// Wait before retry with exponential backoff
await new Promise(resolve =>
setTimeout(resolve, Math.pow(2, attempt) * 1000)
);
}
}
}
Monitoring and Analytics
Track the effectiveness of your user agent rotation:
class UserAgentAnalytics {
constructor() {
this.stats = new Map();
}
recordRequest(userAgent, success, responseTime) {
if (!this.stats.has(userAgent)) {
this.stats.set(userAgent, {
requests: 0,
successes: 0,
failures: 0,
avgResponseTime: 0
});
}
const stat = this.stats.get(userAgent);
stat.requests++;
if (success) {
stat.successes++;
} else {
stat.failures++;
}
stat.avgResponseTime = (stat.avgResponseTime + responseTime) / 2;
stat.successRate = (stat.successes / stat.requests) * 100;
}
getBestUserAgents() {
return Array.from(this.stats.entries())
.sort((a, b) => b[1].successRate - a[1].successRate)
.slice(0, 5);
}
}
Advanced Techniques
User Agent Fingerprinting Evasion
Some websites perform advanced fingerprinting beyond just checking the user agent string. You can enhance your rotation strategy by:
async function advancedFingerprinting(page) {
// Override navigator properties to match user agent
await page.evaluateOnNewDocument(() => {
// Override navigator.webdriver
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});
// Override navigator.plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});
// Override navigator.languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en'],
});
});
}
Real-World Implementation Example
Here's a comprehensive example that combines multiple techniques:
const puppeteer = require('puppeteer');
class AdvancedUserAgentRotator {
constructor() {
this.userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15'
];
this.currentSession = null;
this.requestCount = 0;
this.maxRequestsPerSession = 20;
}
getUserAgent() {
if (!this.currentSession || this.requestCount >= this.maxRequestsPerSession) {
this.startNewSession();
}
this.requestCount++;
return this.currentSession.userAgent;
}
startNewSession() {
this.currentSession = {
userAgent: this.userAgents[Math.floor(Math.random() * this.userAgents.length)],
viewport: this.getRandomViewport()
};
this.requestCount = 0;
}
getRandomViewport() {
const viewports = [
{ width: 1920, height: 1080 },
{ width: 1366, height: 768 },
{ width: 1536, height: 864 },
{ width: 1440, height: 900 }
];
return viewports[Math.floor(Math.random() * viewports.length)];
}
getViewport() {
return this.currentSession ? this.currentSession.viewport : this.getRandomViewport();
}
}
async function enterpriseScraping(urls) {
const rotator = new AdvancedUserAgentRotator();
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const results = [];
try {
for (const url of urls) {
const page = await browser.newPage();
// Set user agent and viewport
await page.setUserAgent(rotator.getUserAgent());
await page.setViewport(rotator.getViewport());
// Set realistic headers
await page.setExtraHTTPHeaders({
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'no-cache'
});
try {
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 30000
});
const data = await page.evaluate(() => ({
title: document.title,
url: window.location.href,
userAgent: navigator.userAgent
}));
results.push({ url, data, success: true });
} catch (error) {
results.push({ url, error: error.message, success: false });
} finally {
await page.close();
}
// Random delay between requests
await new Promise(resolve =>
setTimeout(resolve, 1000 + Math.random() * 2000)
);
}
} finally {
await browser.close();
}
return results;
}
Conclusion
User agent rotation is an essential technique for successful web scraping in JavaScript. Whether you're using Puppeteer for browser automation, Playwright for cross-browser testing, or simple HTTP requests with Axios, implementing proper user agent rotation can significantly improve your scraping success rates.
Remember to combine user agent rotation with other anti-detection techniques like handling timeouts in Puppeteer and proper request spacing. Always respect robots.txt files and website terms of service, and consider using professional scraping services for large-scale operations.
The key to effective user agent rotation is maintaining realistic browsing patterns, using current and valid user agent strings, and monitoring the effectiveness of your rotation strategy through proper analytics and error handling.