How do I set custom headers in JavaScript web scraping requests?

Setting custom headers in JavaScript web scraping is essential for simulating authentic browser requests, handling authentication, and bypassing basic anti-bot measures. This guide covers multiple approaches to add custom headers to your scraping requests.

Using the fetch() API (Recommended)

The modern fetch() API provides the cleanest way to set custom headers:

Plain Object Headers

const response = await fetch('https://example.com/api/data', {
    method: 'GET',
    headers: {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Referer': 'https://example.com',
        'Authorization': 'Bearer your-token-here',
        'X-Custom-Header': 'custom-value'
    }
});

const html = await response.text();
console.log(html);

Using Headers Constructor

const headers = new Headers();
headers.append('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
headers.append('Accept', 'application/json');
headers.append('Authorization', 'Bearer your-token-here');

try {
    const response = await fetch('https://api.example.com/data', {
        method: 'GET',
        headers: headers
    });

    if (!response.ok) {
        throw new Error(`HTTP error! status: ${response.status}`);
    }

    const data = await response.json();
    console.log(data);
} catch (error) {
    console.error('Fetch error:', error);
}

Using XMLHttpRequest

For older environments or when you need more control over the request:

function scrapeWithXHR(url, customHeaders) {
    return new Promise((resolve, reject) => {
        const xhr = new XMLHttpRequest();

        xhr.open('GET', url, true);

        // Set custom headers
        Object.entries(customHeaders).forEach(([key, value]) => {
            xhr.setRequestHeader(key, value);
        });

        xhr.onload = function() {
            if (xhr.status >= 200 && xhr.status < 300) {
                resolve(xhr.responseText);
            } else {
                reject(new Error(`Request failed with status: ${xhr.status}`));
            }
        };

        xhr.onerror = function() {
            reject(new Error('Network error'));
        };

        xhr.send();
    });
}

// Usage
const headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Referer': 'https://google.com'
};

scrapeWithXHR('https://example.com', headers)
    .then(html => console.log(html))
    .catch(error => console.error(error));

Using Axios (Node.js)

For Node.js environments, Axios provides excellent header management:

const axios = require('axios');

// Global defaults
axios.defaults.headers.common['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36';

// Per-request headers
const response = await axios.get('https://example.com/api', {
    headers: {
        'Accept': 'application/json',
        'Accept-Language': 'en-US,en;q=0.9',
        'Referer': 'https://google.com',
        'X-Requested-With': 'XMLHttpRequest',
        'Cookie': 'session_id=abc123; csrf_token=def456'
    },
    timeout: 10000
});

console.log(response.data);

Common Header Patterns for Web Scraping

Browser Simulation Headers

const browserHeaders = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
};

API Authentication Headers

const apiHeaders = {
    'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...',
    'Content-Type': 'application/json',
    'Accept': 'application/json',
    'X-API-Key': 'your-api-key-here'
};

Anti-Bot Evasion Headers

const stealthHeaders = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate',
    'Referer': 'https://www.google.com/',
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'cross-site',
    'Sec-Fetch-User': '?1'
};

Important Considerations

CORS Limitations

When running in browsers, CORS policies may restrict certain headers: - User-Agent cannot be modified in browser environments - Referer is controlled by the browser - Some security headers are automatically managed

Header Validation

Always validate your headers before sending:

function validateHeaders(headers) {
    const forbiddenHeaders = ['host', 'content-length', 'connection'];

    for (const [key, value] of Object.entries(headers)) {
        if (forbiddenHeaders.includes(key.toLowerCase())) {
            console.warn(`Header '${key}' will be ignored by the browser`);
        }

        if (typeof value !== 'string') {
            throw new Error(`Header value for '${key}' must be a string`);
        }
    }
}

Rate Limiting and Respect

// Add delays between requests
function delay(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
}

async function scrapeWithDelay(urls, headers) {
    const results = [];

    for (const url of urls) {
        try {
            const response = await fetch(url, { headers });
            results.push(await response.text());

            // Wait 1 second between requests
            await delay(1000);
        } catch (error) {
            console.error(`Failed to scrape ${url}:`, error);
        }
    }

    return results;
}

Best Practices

  1. Rotate User Agents: Use different browser signatures to avoid detection
  2. Set Realistic Headers: Include headers that real browsers send
  3. Handle Rate Limits: Implement delays and retry logic
  4. Check robots.txt: Respect website scraping policies
  5. Monitor Response Codes: Watch for 429 (rate limited) or 403 (forbidden) responses
  6. Use Proxies When Needed: Combine with proxy rotation for large-scale scraping

Remember to always comply with website terms of service and applicable laws when web scraping.

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon