Setting custom headers in JavaScript web scraping is essential for simulating authentic browser requests, handling authentication, and bypassing basic anti-bot measures. This guide covers multiple approaches to add custom headers to your scraping requests.
Using the fetch() API (Recommended)
The modern fetch()
API provides the cleanest way to set custom headers:
Plain Object Headers
const response = await fetch('https://example.com/api/data', {
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Referer': 'https://example.com',
'Authorization': 'Bearer your-token-here',
'X-Custom-Header': 'custom-value'
}
});
const html = await response.text();
console.log(html);
Using Headers Constructor
const headers = new Headers();
headers.append('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
headers.append('Accept', 'application/json');
headers.append('Authorization', 'Bearer your-token-here');
try {
const response = await fetch('https://api.example.com/data', {
method: 'GET',
headers: headers
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
console.log(data);
} catch (error) {
console.error('Fetch error:', error);
}
Using XMLHttpRequest
For older environments or when you need more control over the request:
function scrapeWithXHR(url, customHeaders) {
return new Promise((resolve, reject) => {
const xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
// Set custom headers
Object.entries(customHeaders).forEach(([key, value]) => {
xhr.setRequestHeader(key, value);
});
xhr.onload = function() {
if (xhr.status >= 200 && xhr.status < 300) {
resolve(xhr.responseText);
} else {
reject(new Error(`Request failed with status: ${xhr.status}`));
}
};
xhr.onerror = function() {
reject(new Error('Network error'));
};
xhr.send();
});
}
// Usage
const headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://google.com'
};
scrapeWithXHR('https://example.com', headers)
.then(html => console.log(html))
.catch(error => console.error(error));
Using Axios (Node.js)
For Node.js environments, Axios provides excellent header management:
const axios = require('axios');
// Global defaults
axios.defaults.headers.common['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36';
// Per-request headers
const response = await axios.get('https://example.com/api', {
headers: {
'Accept': 'application/json',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://google.com',
'X-Requested-With': 'XMLHttpRequest',
'Cookie': 'session_id=abc123; csrf_token=def456'
},
timeout: 10000
});
console.log(response.data);
Common Header Patterns for Web Scraping
Browser Simulation Headers
const browserHeaders = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
};
API Authentication Headers
const apiHeaders = {
'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...',
'Content-Type': 'application/json',
'Accept': 'application/json',
'X-API-Key': 'your-api-key-here'
};
Anti-Bot Evasion Headers
const stealthHeaders = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Referer': 'https://www.google.com/',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'cross-site',
'Sec-Fetch-User': '?1'
};
Important Considerations
CORS Limitations
When running in browsers, CORS policies may restrict certain headers:
- User-Agent
cannot be modified in browser environments
- Referer
is controlled by the browser
- Some security headers are automatically managed
Header Validation
Always validate your headers before sending:
function validateHeaders(headers) {
const forbiddenHeaders = ['host', 'content-length', 'connection'];
for (const [key, value] of Object.entries(headers)) {
if (forbiddenHeaders.includes(key.toLowerCase())) {
console.warn(`Header '${key}' will be ignored by the browser`);
}
if (typeof value !== 'string') {
throw new Error(`Header value for '${key}' must be a string`);
}
}
}
Rate Limiting and Respect
// Add delays between requests
function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function scrapeWithDelay(urls, headers) {
const results = [];
for (const url of urls) {
try {
const response = await fetch(url, { headers });
results.push(await response.text());
// Wait 1 second between requests
await delay(1000);
} catch (error) {
console.error(`Failed to scrape ${url}:`, error);
}
}
return results;
}
Best Practices
- Rotate User Agents: Use different browser signatures to avoid detection
- Set Realistic Headers: Include headers that real browsers send
- Handle Rate Limits: Implement delays and retry logic
- Check robots.txt: Respect website scraping policies
- Monitor Response Codes: Watch for 429 (rate limited) or 403 (forbidden) responses
- Use Proxies When Needed: Combine with proxy rotation for large-scale scraping
Remember to always comply with website terms of service and applicable laws when web scraping.