Table of contents

How to handle AJAX requests using Puppeteer?

How to Handle AJAX Requests in Puppeteer

Puppeteer provides powerful capabilities for handling AJAX requests, which is essential for scraping dynamic websites that load content asynchronously. This guide covers monitoring, intercepting, and waiting for AJAX requests.

Basic AJAX Request Monitoring

1. Setup and Installation

npm install puppeteer

2. Monitor All AJAX Requests

const puppeteer = require('puppeteer');

(async () => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();

    // Enable request interception
    await page.setRequestInterception(true);

    // Monitor AJAX requests
    page.on('request', (request) => {
        if (request.resourceType() === 'xhr' || request.resourceType() === 'fetch') {
            console.log('AJAX Request:', {
                url: request.url(),
                method: request.method(),
                headers: request.headers(),
                postData: request.postData()
            });
        }
        request.continue();
    });

    // Monitor responses
    page.on('response', (response) => {
        if (response.request().resourceType() === 'xhr' || response.request().resourceType() === 'fetch') {
            console.log('AJAX Response:', {
                url: response.url(),
                status: response.status(),
                headers: response.headers()
            });
        }
    });

    await page.goto('https://example.com');
    await page.waitForTimeout(5000); // Wait for AJAX requests
    await browser.close();
})();

Intercepting and Modifying AJAX Requests

Block Specific AJAX Requests

page.on('request', (request) => {
    if (request.resourceType() === 'xhr' && request.url().includes('analytics')) {
        // Block analytics requests
        request.abort();
    } else if (request.resourceType() === 'xhr') {
        console.log('Allowing AJAX request:', request.url());
        request.continue();
    } else {
        request.continue();
    }
});

Modify Request Headers

page.on('request', (request) => {
    if (request.resourceType() === 'xhr') {
        request.continue({
            headers: {
                ...request.headers(),
                'Authorization': 'Bearer your-token',
                'Custom-Header': 'custom-value'
            }
        });
    } else {
        request.continue();
    }
});

Waiting for AJAX Requests

Wait for Specific Network Activity

const puppeteer = require('puppeteer');

(async () => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();

    // Wait for specific AJAX endpoint
    const responsePromise = page.waitForResponse(response => 
        response.url().includes('/api/data') && response.status() === 200
    );

    await page.goto('https://example.com');

    // Trigger AJAX request
    await page.click('#load-data-button');

    // Wait for the response
    const response = await responsePromise;
    const data = await response.json();
    console.log('AJAX data:', data);

    await browser.close();
})();

Wait for Network Idle

// Wait for network to be idle (no requests for 500ms)
await page.goto('https://example.com', { 
    waitUntil: 'networkidle0' 
});

// Or wait for most requests to finish
await page.goto('https://example.com', { 
    waitUntil: 'networkidle2' 
});

Advanced AJAX Handling Techniques

Capture AJAX Response Data

const capturedData = {};

page.on('response', async (response) => {
    if (response.request().resourceType() === 'xhr' && 
        response.url().includes('/api/')) {
        try {
            const data = await response.json();
            capturedData[response.url()] = data;
            console.log('Captured AJAX data:', data);
        } catch (error) {
            console.log('Non-JSON response from:', response.url());
        }
    }
});

Wait for Multiple AJAX Requests

async function waitForMultipleRequests(page, urls, timeout = 30000) {
    const promises = urls.map(url => 
        page.waitForResponse(response => 
            response.url().includes(url) && response.status() === 200,
            { timeout }
        )
    );

    try {
        const responses = await Promise.all(promises);
        return responses;
    } catch (error) {
        console.log('Some requests timed out:', error.message);
        return [];
    }
}

// Usage
const responses = await waitForMultipleRequests(page, [
    '/api/user',
    '/api/posts',
    '/api/comments'
]);

Handle Dynamic Content Loading

async function waitForDynamicContent(page, selector, maxWait = 10000) {
    const startTime = Date.now();

    while (Date.now() - startTime < maxWait) {
        const element = await page.$(selector);
        if (element) {
            const text = await element.evaluate(el => el.textContent);
            if (text && text.trim() !== 'Loading...') {
                return text;
            }
        }
        await page.waitForTimeout(100);
    }

    throw new Error(`Content not loaded within ${maxWait}ms`);
}

// Usage
await page.goto('https://example.com');
await page.click('#load-content');
const content = await waitForDynamicContent(page, '#dynamic-content');

Complete Example: E-commerce Product Scraper

const puppeteer = require('puppeteer');

(async () => {
    const browser = await puppeteer.launch({ headless: false });
    const page = await browser.newPage();

    const products = [];

    // Intercept product data AJAX requests
    page.on('response', async (response) => {
        if (response.url().includes('/api/products') && response.status() === 200) {
            try {
                const data = await response.json();
                products.push(...data.products);
                console.log(`Captured ${data.products.length} products`);
            } catch (error) {
                console.log('Error parsing product data:', error);
            }
        }
    });

    await page.goto('https://example-shop.com');

    // Load more products by scrolling
    let previousHeight = 0;
    while (true) {
        await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));

        // Wait for new content to load
        await page.waitForTimeout(2000);

        const currentHeight = await page.evaluate(() => document.body.scrollHeight);
        if (currentHeight === previousHeight) break;

        previousHeight = currentHeight;
    }

    console.log(`Total products captured: ${products.length}`);
    await browser.close();
})();

Best Practices

  1. Always handle errors when parsing AJAX responses
  2. Set appropriate timeouts to avoid hanging scripts
  3. Use networkidle0 or networkidle2 for pages with heavy AJAX activity
  4. Monitor both XHR and Fetch requests for complete coverage
  5. Be respectful of rate limits when intercepting requests
  6. Clean up resources by closing browsers properly

This comprehensive approach to AJAX handling will help you effectively scrape dynamic websites and capture asynchronous data loading patterns.

Try WebScraping.AI for Your Web Scraping Needs

Looking for a powerful web scraping solution? WebScraping.AI provides an LLM-powered API that combines Chromium JavaScript rendering with rotating proxies for reliable data extraction.

Key Features:

  • AI-powered extraction: Ask questions about web pages or extract structured data fields
  • JavaScript rendering: Full Chromium browser support for dynamic content
  • Rotating proxies: Datacenter and residential proxies from multiple countries
  • Easy integration: Simple REST API with SDKs for Python, Ruby, PHP, and more
  • Reliable & scalable: Built for developers who need consistent results

Getting Started:

Get page content with AI analysis:

curl "https://api.webscraping.ai/ai/question?url=https://example.com&question=What is the main topic?&api_key=YOUR_API_KEY"

Extract structured data:

curl "https://api.webscraping.ai/ai/fields?url=https://example.com&fields[title]=Page title&fields[price]=Product price&api_key=YOUR_API_KEY"

Try in request builder

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon