How do I set custom headers in JavaScript web scraping requests?

Setting custom headers in JavaScript web scraping is essential for simulating authentic browser requests, handling authentication, and bypassing basic anti-bot measures. This guide covers multiple approaches to add custom headers to your scraping requests.

Using the fetch() API (Recommended)

The modern fetch() API provides the cleanest way to set custom headers:

Plain Object Headers

const response = await fetch('https://example.com/api/data', {
    method: 'GET',
    headers: {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Referer': 'https://example.com',
        'Authorization': 'Bearer your-token-here',
        'X-Custom-Header': 'custom-value'
    }
});

const html = await response.text();
console.log(html);

Using Headers Constructor

const headers = new Headers();
headers.append('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
headers.append('Accept', 'application/json');
headers.append('Authorization', 'Bearer your-token-here');

try {
    const response = await fetch('https://api.example.com/data', {
        method: 'GET',
        headers: headers
    });

    if (!response.ok) {
        throw new Error(`HTTP error! status: ${response.status}`);
    }

    const data = await response.json();
    console.log(data);
} catch (error) {
    console.error('Fetch error:', error);
}

Using XMLHttpRequest

For older environments or when you need more control over the request:

function scrapeWithXHR(url, customHeaders) {
    return new Promise((resolve, reject) => {
        const xhr = new XMLHttpRequest();

        xhr.open('GET', url, true);

        // Set custom headers
        Object.entries(customHeaders).forEach(([key, value]) => {
            xhr.setRequestHeader(key, value);
        });

        xhr.onload = function() {
            if (xhr.status >= 200 && xhr.status < 300) {
                resolve(xhr.responseText);
            } else {
                reject(new Error(`Request failed with status: ${xhr.status}`));
            }
        };

        xhr.onerror = function() {
            reject(new Error('Network error'));
        };

        xhr.send();
    });
}

// Usage
const headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Referer': 'https://google.com'
};

scrapeWithXHR('https://example.com', headers)
    .then(html => console.log(html))
    .catch(error => console.error(error));

Using Axios (Node.js)

For Node.js environments, Axios provides excellent header management:

const axios = require('axios');

// Global defaults
axios.defaults.headers.common['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36';

// Per-request headers
const response = await axios.get('https://example.com/api', {
    headers: {
        'Accept': 'application/json',
        'Accept-Language': 'en-US,en;q=0.9',
        'Referer': 'https://google.com',
        'X-Requested-With': 'XMLHttpRequest',
        'Cookie': 'session_id=abc123; csrf_token=def456'
    },
    timeout: 10000
});

console.log(response.data);

Common Header Patterns for Web Scraping

Browser Simulation Headers

const browserHeaders = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
};

API Authentication Headers

const apiHeaders = {
    'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...',
    'Content-Type': 'application/json',
    'Accept': 'application/json',
    'X-API-Key': 'your-api-key-here'
};

Anti-Bot Evasion Headers

const stealthHeaders = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate',
    'Referer': 'https://www.google.com/',
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'cross-site',
    'Sec-Fetch-User': '?1'
};

Important Considerations

CORS Limitations

When running in browsers, CORS policies may restrict certain headers: - User-Agent cannot be modified in browser environments - Referer is controlled by the browser - Some security headers are automatically managed

Header Validation

Always validate your headers before sending:

function validateHeaders(headers) {
    const forbiddenHeaders = ['host', 'content-length', 'connection'];

    for (const [key, value] of Object.entries(headers)) {
        if (forbiddenHeaders.includes(key.toLowerCase())) {
            console.warn(`Header '${key}' will be ignored by the browser`);
        }

        if (typeof value !== 'string') {
            throw new Error(`Header value for '${key}' must be a string`);
        }
    }
}

Rate Limiting and Respect

// Add delays between requests
function delay(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
}

async function scrapeWithDelay(urls, headers) {
    const results = [];

    for (const url of urls) {
        try {
            const response = await fetch(url, { headers });
            results.push(await response.text());

            // Wait 1 second between requests
            await delay(1000);
        } catch (error) {
            console.error(`Failed to scrape ${url}:`, error);
        }
    }

    return results;
}

Best Practices

Rotate User Agents: Use different browser signatures to avoid detection
Set Realistic Headers: Include headers that real browsers send
Handle Rate Limits: Implement delays and retry logic
Check robots.txt: Respect website scraping policies
Monitor Response Codes: Watch for 429 (rate limited) or 403 (forbidden) responses
Use Proxies When Needed: Combine with proxy rotation for large-scale scraping

Remember to always comply with website terms of service and applicable laws when web scraping.

Table of contents

How do I set custom headers in JavaScript web scraping requests?

Using the fetch() API (Recommended)

Plain Object Headers

Using Headers Constructor

Using XMLHttpRequest

Using Axios (Node.js)

Common Header Patterns for Web Scraping

Browser Simulation Headers

API Authentication Headers

Anti-Bot Evasion Headers

Important Considerations

CORS Limitations

Header Validation

Rate Limiting and Respect

Best Practices

Try WebScraping.AI for Your Web Scraping Needs

Key Features:

Getting Started:

Related Questions

What are some ways to mimic human behavior in JavaScript web scraping?

How do I handle redirects while scraping with JavaScript?

Get Started Now