How to run multiple pages in parallel with Puppeteer?

Running multiple pages in parallel with Puppeteer dramatically improves performance when scraping multiple URLs or performing bulk automation tasks. There are several approaches depending on your programming language and use case.

JavaScript: Multiple Pages in One Browser

The most efficient approach is to open multiple pages within a single browser instance:

const puppeteer = require('puppeteer');

async function scrapeMultiplePages() {
    const browser = await puppeteer.launch();

    const urls = [
        'https://example.com',
        'https://example.org',
        'https://example.net'
    ];

    // Create multiple pages
    const pages = await Promise.all(
        urls.map(() => browser.newPage())
    );

    // Navigate all pages in parallel
    await Promise.all(
        pages.map((page, index) => page.goto(urls[index]))
    );

    // Extract data from all pages in parallel
    const results = await Promise.all(
        pages.map(async (page) => {
            const title = await page.title();
            const content = await page.$eval('body', el => el.textContent);
            return { title, url: page.url(), content: content.slice(0, 100) };
        })
    );

    await browser.close();
    return results;
}

scrapeMultiplePages()
    .then(results => console.log(results))
    .catch(console.error);

JavaScript: Pool of Browser Pages

For better resource management with many URLs, use a page pool:

const puppeteer = require('puppeteer');

class PagePool {
    constructor(browser, poolSize = 5) {
        this.browser = browser;
        this.poolSize = poolSize;
        this.pages = [];
        this.busy = new Set();
    }

    async init() {
        for (let i = 0; i < this.poolSize; i++) {
            const page = await this.browser.newPage();
            this.pages.push(page);
        }
    }

    async getPage() {
        const availablePage = this.pages.find(page => !this.busy.has(page));
        if (availablePage) {
            this.busy.add(availablePage);
            return availablePage;
        }
        // Wait for a page to become available
        await new Promise(resolve => setTimeout(resolve, 100));
        return this.getPage();
    }

    releasePage(page) {
        this.busy.delete(page);
    }

    async close() {
        await Promise.all(this.pages.map(page => page.close()));
    }
}

async function scrapeWithPool(urls) {
    const browser = await puppeteer.launch();
    const pool = new PagePool(browser, 5);
    await pool.init();

    const results = await Promise.all(
        urls.map(async (url) => {
            const page = await pool.getPage();
            try {
                await page.goto(url);
                const title = await page.title();
                return { url, title };
            } finally {
                pool.releasePage(page);
            }
        })
    );

    await pool.close();
    await browser.close();
    return results;
}

JavaScript: Concurrent Processing with Limits

For processing hundreds of URLs without overwhelming the system:

const puppeteer = require('puppeteer');

async function processBatch(urls, concurrency = 10) {
    const browser = await puppeteer.launch();
    const results = [];

    for (let i = 0; i < urls.length; i += concurrency) {
        const batch = urls.slice(i, i + concurrency);

        const batchResults = await Promise.all(
            batch.map(async (url) => {
                const page = await browser.newPage();
                try {
                    await page.goto(url, { waitUntil: 'networkidle2' });
                    const title = await page.title();
                    const screenshot = await page.screenshot({ type: 'png' });
                    return { url, title, screenshot };
                } catch (error) {
                    console.error(`Error processing ${url}:`, error.message);
                    return { url, error: error.message };
                } finally {
                    await page.close();
                }
            })
        );

        results.push(...batchResults);
        console.log(`Processed batch ${Math.floor(i/concurrency) + 1}`);
    }

    await browser.close();
    return results;
}

// Usage
const urls = Array.from({length: 50}, (_, i) => `https://example.com/page${i}`);
processBatch(urls, 5).then(console.log);

Python with Pyppeteer

For Python users, Pyppeteer provides similar functionality:

import asyncio
from pyppeteer import launch

async def scrape_page(browser, url):
    """Scrape a single page using a shared browser instance"""
    page = await browser.newPage()
    try:
        await page.goto(url)
        title = await page.title()
        content = await page.content()
        return {'url': url, 'title': title, 'content_length': len(content)}
    except Exception as e:
        return {'url': url, 'error': str(e)}
    finally:
        await page.close()

async def scrape_multiple_pages(urls):
    """Scrape multiple pages in parallel"""
    browser = await launch(headless=True)

    try:
        # Create tasks for all URLs
        tasks = [scrape_page(browser, url) for url in urls]

        # Execute all tasks in parallel
        results = await asyncio.gather(*tasks, return_exceptions=True)

        return results
    finally:
        await browser.close()

async def main():
    urls = [
        'https://example.com',
        'https://example.org',
        'https://example.net',
        'https://httpbin.org/delay/1',
        'https://httpbin.org/delay/2'
    ]

    results = await scrape_multiple_pages(urls)

    for result in results:
        if isinstance(result, dict) and 'error' not in result:
            print(f"✓ {result['title']} - {result['url']}")
        else:
            print(f"✗ Error: {result}")

# Run the scraper
asyncio.run(main())

Best Practices and Performance Tips

Resource Management

Use page pools for large-scale scraping (5-10 concurrent pages)
Limit concurrent pages to avoid memory issues
Reuse browser instances instead of creating new ones for each page
Close pages when done to free memory

Error Handling

async function robustScraping(urls) {
    const browser = await puppeteer.launch();
    const results = [];

    for (const url of urls) {
        const page = await browser.newPage();
        try {
            await page.goto(url, { 
                waitUntil: 'networkidle2',
                timeout: 30000 
            });

            const data = await page.evaluate(() => ({
                title: document.title,
                links: Array.from(document.links).length
            }));

            results.push({ url, success: true, data });
        } catch (error) {
            results.push({ url, success: false, error: error.message });
        } finally {
            await page.close();
        }

        // Small delay to avoid overwhelming the server
        await new Promise(resolve => setTimeout(resolve, 100));
    }

    await browser.close();
    return results;
}

Performance Optimization

Disable images and CSS for faster loading: await page.setRequestInterception(true)
Use networkidle2 instead of networkidle0 for better performance
Set appropriate timeouts to handle slow pages
Implement retry logic for failed requests

Running pages in parallel significantly reduces scraping time, but always consider the target server's capacity and implement proper rate limiting to avoid being blocked.

Table of contents

How to run multiple pages in parallel with Puppeteer?

JavaScript: Multiple Pages in One Browser

JavaScript: Pool of Browser Pages

JavaScript: Concurrent Processing with Limits

Python with Pyppeteer

Best Practices and Performance Tips

Resource Management

Error Handling

Performance Optimization

Try WebScraping.AI for Your Web Scraping Needs

Key Features:

Getting Started:

Related Questions

How to monitor network requests in Puppeteer?

How can I use Puppeteer for SEO auditing?

How to interact with DOM elements in Puppeteer?

Get Started Now