Mimicking human behavior in JavaScript web scraping helps avoid detection by anti-bot systems and ensures ethical compliance with website terms of service. Modern websites employ sophisticated detection methods including behavioral analysis, browser fingerprinting, and rate limiting. Here are proven techniques to make your scraping bots appear more human-like using tools like Puppeteer and Playwright:
1. Natural Mouse Movements and Clicks
Human mouse movements are never perfectly straight or predictable. Implement curved mouse paths and randomized click positions to avoid detection algorithms that track mouse behavior patterns.
const puppeteer = require('puppeteer');
// Simulate curved mouse movement using Bezier curves
async function moveMouseLikeHuman(page, startX, startY, endX, endY) {
const steps = Math.floor(Math.random() * 20) + 10; // 10-30 steps
for (let i = 0; i <= steps; i++) {
const t = i / steps;
// Add some randomness to create a more natural curve
const noise = (Math.random() - 0.5) * 5;
const x = startX + (endX - startX) * t + noise;
const y = startY + (endY - startY) * t + noise;
await page.mouse.move(x, y);
await page.waitForTimeout(Math.random() * 5 + 1); // Small delays between movements
}
}
async function simulateHumanClick(page, selector) {
const rect = await page.evaluate(selector => {
const element = document.querySelector(selector);
if (!element) return null;
const { top, left, bottom, right } = element.getBoundingClientRect();
return { top, left, bottom, right };
}, selector);
if (!rect) throw new Error(`Element not found: ${selector}`);
// Random position within element bounds (avoiding edges)
const padding = 5;
const clickPosition = {
x: rect.left + padding + Math.random() * (rect.right - rect.left - padding * 2),
y: rect.top + padding + Math.random() * (rect.bottom - rect.top - padding * 2)
};
// Get current mouse position and move naturally
const currentPos = await page.evaluate(() => ({
x: window.mouseX || 0,
y: window.mouseY || 0
}));
await moveMouseLikeHuman(page, currentPos.x, currentPos.y, clickPosition.x, clickPosition.y);
// Add slight delay before clicking (human reaction time)
await page.waitForTimeout(Math.random() * 100 + 50);
await page.mouse.click(clickPosition.x, clickPosition.y);
}
// Example usage with Playwright
const { chromium } = require('playwright');
async function playwriteExample() {
const browser = await chromium.launch();
const page = await browser.newPage();
// Track mouse position for natural movements
await page.addInitScript(() => {
document.addEventListener('mousemove', (e) => {
window.mouseX = e.clientX;
window.mouseY = e.clientY;
});
});
await page.goto('https://example.com');
await simulateHumanClick(page, 'button#submit');
await browser.close();
}
2. Realistic Typing Patterns and Delays
Human typing has natural variations in speed, includes occasional typos, and has pauses for thinking. Avoid setting input values directly as this can be easily detected.
async function humanTypeAdvanced(page, selector, text, options = {}) {
const {
minDelay = 50,
maxDelay = 200,
typoChance = 0.05, // 5% chance of typo
thinkingPause = 0.1 // 10% chance of longer pause
} = options;
await page.click(selector);
await page.waitForTimeout(Math.random() * 300 + 100); // Initial focus delay
for (let i = 0; i < text.length; i++) {
const char = text[i];
// Simulate occasional typos
if (Math.random() < typoChance && i > 0) {
const wrongChar = String.fromCharCode(char.charCodeAt(0) + (Math.random() > 0.5 ? 1 : -1));
await page.keyboard.type(wrongChar);
await page.waitForTimeout(Math.random() * 200 + 100);
await page.keyboard.press('Backspace');
await page.waitForTimeout(Math.random() * 100 + 50);
}
await page.keyboard.type(char);
// Variable typing speed
let delay = Math.random() * (maxDelay - minDelay) + minDelay;
// Occasional thinking pauses (longer delays)
if (Math.random() < thinkingPause) {
delay += Math.random() * 1000 + 500;
}
// Slower typing after spaces (word boundaries)
if (char === ' ') {
delay += Math.random() * 100 + 50;
}
await page.waitForTimeout(delay);
}
}
// More natural form filling
async function fillFormNaturally(page, formData) {
for (const [selector, value] of Object.entries(formData)) {
await page.focus(selector);
await page.waitForTimeout(Math.random() * 500 + 200);
// Clear existing text naturally
await page.keyboard.down('Control');
await page.keyboard.press('KeyA');
await page.keyboard.up('Control');
await page.waitForTimeout(50);
await humanTypeAdvanced(page, selector, value);
// Tab to next field or random mouse movement
if (Math.random() > 0.5) {
await page.keyboard.press('Tab');
} else {
// Move mouse randomly before next action
const viewport = page.viewport();
await page.mouse.move(
Math.random() * viewport.width,
Math.random() * viewport.height
);
}
await page.waitForTimeout(Math.random() * 1000 + 500);
}
}
3. Behavioral Timing Patterns
Implement realistic timing patterns that mirror human browsing behavior, including reading time, scrolling patterns, and interaction delays.
// Calculate reading time based on content length
function calculateReadingTime(textLength) {
const wordsPerMinute = 200 + Math.random() * 100; // 200-300 WPM
const words = textLength / 5; // Average word length
const minutes = words / wordsPerMinute;
return Math.max(minutes * 60 * 1000, 2000); // Minimum 2 seconds
}
async function simulateReading(page, selector) {
const textContent = await page.$eval(selector, el => el.textContent || '');
const readingTime = calculateReadingTime(textContent.length);
// Simulate scrolling while reading
const scrollSteps = Math.floor(readingTime / 2000); // Scroll every 2 seconds
for (let i = 0; i < scrollSteps; i++) {
await page.mouse.wheel(0, Math.random() * 200 + 100);
await page.waitForTimeout(Math.random() * 3000 + 1000);
}
await page.waitForTimeout(readingTime * (0.7 + Math.random() * 0.6)); // ±30% variation
}
// Simulate human-like page interaction patterns
async function browseNaturally(page, url) {
await page.goto(url, { waitUntil: 'networkidle2' });
// Initial page load pause (users orient themselves)
await page.waitForTimeout(Math.random() * 2000 + 1000);
// Simulate scrolling to get page overview
const scrollDistance = await page.evaluate(() => document.body.scrollHeight);
const scrollSteps = 3 + Math.floor(Math.random() * 5);
for (let i = 0; i < scrollSteps; i++) {
const scrollAmount = (scrollDistance / scrollSteps) + (Math.random() - 0.5) * 200;
await page.mouse.wheel(0, scrollAmount);
await page.waitForTimeout(Math.random() * 1500 + 500);
}
// Scroll back to top for interaction
await page.evaluate(() => window.scrollTo(0, 0));
await page.waitForTimeout(Math.random() * 1000 + 500);
}
4. Browser Fingerprinting and User Agent Management
Modern anti-bot systems analyze browser fingerprints including user agent, screen resolution, timezone, language, and WebGL renderer. Create consistent, realistic browser profiles.
// Realistic user agent rotation with consistent profiles
const browserProfiles = [
{
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport: { width: 1920, height: 1080 },
platform: 'Win32',
language: 'en-US',
timezone: 'America/New_York'
},
{
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport: { width: 1440, height: 900 },
platform: 'MacIntel',
language: 'en-US',
timezone: 'America/Los_Angeles'
},
{
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport: { width: 1366, height: 768 },
platform: 'Linux x86_64',
language: 'en-US',
timezone: 'America/Chicago'
}
];
async function setupBrowserProfile(page) {
const profile = browserProfiles[Math.floor(Math.random() * browserProfiles.length)];
// Set user agent
await page.setUserAgent(profile.userAgent);
// Set viewport
await page.setViewportSize(profile.viewport);
// Override browser properties to match profile
await page.addInitScript((profileData) => {
Object.defineProperty(navigator, 'platform', {
get: () => profileData.platform
});
Object.defineProperty(navigator, 'language', {
get: () => profileData.language
});
Object.defineProperty(navigator, 'languages', {
get: () => [profileData.language, 'en']
});
// Override timezone
const originalDateTimeFormat = Intl.DateTimeFormat;
Intl.DateTimeFormat = function(...args) {
if (args.length === 0) {
args = [profileData.timezone];
}
return originalDateTimeFormat.apply(this, args);
};
// Spoof WebGL renderer and vendor
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function(parameter) {
if (parameter === 37445) return 'Intel Inc.'; // UNMASKED_VENDOR_WEBGL
if (parameter === 37446) return 'Intel(R) Iris(TM) Graphics 6100'; // UNMASKED_RENDERER_WEBGL
return getParameter.apply(this, arguments);
};
}, profile);
}
// Advanced fingerprint evasion
async function evadeFingerprinting(page) {
await page.addInitScript(() => {
// Remove automation indicators
delete window.navigator.webdriver;
// Override plugin detection
Object.defineProperty(navigator, 'plugins', {
get: () => [
{
name: 'Chrome PDF Plugin',
filename: 'internal-pdf-viewer',
description: 'Portable Document Format',
length: 1
}
]
});
// Spoof canvas fingerprinting
const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;
HTMLCanvasElement.prototype.toDataURL = function(...args) {
const result = originalToDataURL.apply(this, args);
// Add slight noise to canvas fingerprint
return result.replace(/(.{50})/, '$1' + Math.random().toString(36).substr(2, 1));
};
// Spoof AudioContext fingerprinting
const AudioContext = window.AudioContext || window.webkitAudioContext;
if (AudioContext) {
const originalCreateOscillator = AudioContext.prototype.createOscillator;
AudioContext.prototype.createOscillator = function() {
const oscillator = originalCreateOscillator.apply(this, arguments);
const originalStart = oscillator.start;
oscillator.start = function(when) {
// Add tiny random delay to audio fingerprinting
return originalStart.call(this, when ? when + Math.random() * 0.0001 : when);
};
return oscillator;
};
}
});
}
5. Advanced Browser Configuration
Configure browsers to avoid common headless detection methods and appear as genuine user sessions.
async function createStealthBrowser() {
const browser = await puppeteer.launch({
headless: 'new', // Use new headless mode (less detectable than old headless)
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
'--disable-features=VizDisplayCompositor',
'--disable-web-security',
'--disable-features=WebRtcHideLocalIpsWithMdns',
'--window-size=1920,1080',
'--start-maximized'
],
defaultViewport: null,
ignoreDefaultArgs: ['--enable-automation'],
env: {
...process.env,
DISPLAY: process.env.DISPLAY || ':99' // For Linux environments
}
});
const page = await browser.newPage();
// Remove automation indicators
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});
// Mock chrome runtime
window.chrome = {
runtime: {}
};
// Mock notification permissions
Object.defineProperty(Notification, 'permission', {
get: () => 'default'
});
});
return { browser, page };
}
6. Proxy Rotation and Session Management
Implement intelligent proxy rotation with session persistence to mimic real user geographic distribution and maintain cookies.
const proxyRotator = {
proxies: [
'http://user:pass@proxy1.example.com:8080',
'http://user:pass@proxy2.example.com:8080',
'http://user:pass@proxy3.example.com:8080'
],
currentIndex: 0,
getNextProxy() {
const proxy = this.proxies[this.currentIndex];
this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
return proxy;
},
async createProxiedBrowser() {
const proxy = this.getNextProxy();
const browser = await puppeteer.launch({
args: [`--proxy-server=${proxy}`],
headless: 'new'
});
return browser;
}
};
// Session-aware browsing with cookie persistence
class BrowsingSession {
constructor(sessionId) {
this.sessionId = sessionId;
this.cookies = [];
this.userAgent = null;
this.proxy = null;
}
async initializeBrowser() {
this.proxy = proxyRotator.getNextProxy();
this.browser = await puppeteer.launch({
args: [`--proxy-server=${this.proxy}`],
headless: 'new'
});
this.page = await this.browser.newPage();
await setupBrowserProfile(this.page);
// Restore previous session cookies
if (this.cookies.length > 0) {
await this.page.setCookie(...this.cookies);
}
}
async saveCookies() {
this.cookies = await this.page.cookies();
}
async navigate(url, options = {}) {
await browseNaturally(this.page, url);
await this.saveCookies();
if (options.simulateReading) {
await simulateReading(this.page, 'body');
}
}
}
7. Request Pattern Mimicking
Implement realistic request patterns that mirror human browsing including resource loading, prefetching, and background requests.
// Simulate realistic resource loading patterns
async function simulateResourceLoading(page) {
// Load additional resources that browsers typically request
await page.addInitScript(() => {
// Simulate favicon requests
const favicon = document.createElement('link');
favicon.rel = 'icon';
favicon.href = '/favicon.ico';
document.head.appendChild(favicon);
// Simulate analytics/tracking pixels (but don't actually load them)
const img = new Image();
img.src = '';
// Simulate WebSocket connections that some sites expect
if (window.WebSocket) {
setTimeout(() => {
try {
new WebSocket('wss://echo.websocket.org/');
} catch (e) {
// Ignore errors - this is just to trigger fingerprinting
}
}, Math.random() * 5000 + 2000);
}
});
}
// Rate limiting with burst patterns
class RateLimiter {
constructor(requestsPerMinute = 30) {
this.requests = [];
this.maxRequests = requestsPerMinute;
}
async waitForRequest() {
const now = Date.now();
const oneMinuteAgo = now - 60000;
// Remove old requests
this.requests = this.requests.filter(time => time > oneMinuteAgo);
if (this.requests.length >= this.maxRequests) {
const oldestRequest = Math.min(...this.requests);
const waitTime = oldestRequest + 60000 - now;
await new Promise(resolve => setTimeout(resolve, waitTime));
return this.waitForRequest();
}
// Add human-like randomness to request timing
const randomDelay = Math.random() * 3000 + 1000; // 1-4 seconds
await new Promise(resolve => setTimeout(resolve, randomDelay));
this.requests.push(Date.now());
}
}
// Complete human-like scraping session
async function scrapeWithHumanBehavior(urls) {
const session = new BrowsingSession('session_' + Date.now());
const rateLimiter = new RateLimiter(20); // 20 requests per minute
await session.initializeBrowser();
try {
for (const url of urls) {
await rateLimiter.waitForRequest();
console.log(`Navigating to: ${url}`);
await session.navigate(url, { simulateReading: true });
// Simulate human-like interactions
await simulateResourceLoading(session.page);
// Random chance of additional interactions
if (Math.random() > 0.7) {
// Simulate scrolling or clicking
await simulateReading(session.page, 'body');
}
// Extract data here...
const title = await session.page.title();
console.log(`Page title: ${title}`);
}
} finally {
await session.browser.close();
}
}
8. Detection Evasion and Monitoring
Implement detection monitoring and adaptive behavior modification to respond to anti-bot countermeasures.
// Monitor for bot detection indicators
async function checkForBotDetection(page) {
const indicators = await page.evaluate(() => {
const checks = {
captcha: document.querySelector('[class*="captcha"], [id*="captcha"]') !== null,
blocked: document.body.textContent.toLowerCase().includes('blocked') ||
document.body.textContent.toLowerCase().includes('forbidden'),
rateLimited: document.body.textContent.toLowerCase().includes('rate limit'),
jsChallenge: document.title.toLowerCase().includes('checking') ||
document.body.textContent.toLowerCase().includes('checking your browser')
};
return checks;
});
return indicators;
}
// Adaptive behavior modification
async function adaptiveScraping(page, url) {
await page.goto(url, { waitUntil: 'networkidle2' });
const detection = await checkForBotDetection(page);
if (detection.captcha) {
console.log('CAPTCHA detected - manual intervention required');
throw new Error('CAPTCHA_DETECTED');
}
if (detection.blocked || detection.rateLimited) {
console.log('Rate limiting detected - increasing delays');
await page.waitForTimeout(Math.random() * 10000 + 10000); // 10-20 second delay
return adaptiveScraping(page, url); // Retry
}
if (detection.jsChallenge) {
console.log('JavaScript challenge detected - waiting longer');
await page.waitForTimeout(5000);
await page.reload({ waitUntil: 'networkidle2' });
}
return page;
}
Best Practices and Ethical Considerations
When implementing human-like behavior in web scraping, always prioritize ethical and legal compliance:
1. Respect robots.txt and Terms of Service
Always check and comply with the website's robots.txt file and terms of service before scraping.
2. Rate Limiting
Implement reasonable delays between requests to avoid overwhelming servers:
const delay = Math.random() * 2000 + 1000; // 1-3 second delay
await page.waitForTimeout(delay);
3. API First Approach
Always prefer official APIs over scraping when available. Scraping should be a last resort.
4. Data Minimization
Only collect the data you actually need and delete it when no longer required.
5. Monitoring and Logging
Implement proper logging to monitor your scraping behavior and ensure compliance:
const fs = require('fs');
function logRequest(url, status, timestamp) {
const logEntry = `${timestamp}: ${status} - ${url}\n`;
fs.appendFileSync('scraping.log', logEntry);
}
Remember that while these techniques can make your scrapers more human-like, they should always be used responsibly and within legal boundaries. The goal is not to circumvent legitimate security measures, but to create well-behaved automation that doesn't negatively impact web services.