Table of contents

How to handle multiple browser instances in Puppeteer?

How to Handle Multiple Browser Instances in Puppeteer

Managing multiple browser instances in Puppeteer is essential for parallel processing, user session simulation, and load testing. This guide covers different approaches with practical examples.

Multiple Browser Instances vs Multiple Pages

Separate Browser Instances

Use separate browser instances when you need complete isolation between sessions:

const puppeteer = require('puppeteer');

async function multipleBrowsers() {
    try {
        // Launch multiple browser instances
        const [browser1, browser2, browser3] = await Promise.all([
            puppeteer.launch({ headless: 'new' }),
            puppeteer.launch({ headless: 'new' }),
            puppeteer.launch({ headless: 'new' })
        ]);

        // Create pages in each browser
        const page1 = await browser1.newPage();
        const page2 = await browser2.newPage();
        const page3 = await browser3.newPage();

        // Navigate to different URLs in parallel
        await Promise.all([
            page1.goto('https://example1.com'),
            page2.goto('https://example2.com'),
            page3.goto('https://example3.com')
        ]);

        // Perform actions on each page
        const results = await Promise.all([
            page1.title(),
            page2.title(),
            page3.title()
        ]);

        console.log('Page titles:', results);

        // Clean up
        await Promise.all([
            browser1.close(),
            browser2.close(),
            browser3.close()
        ]);
    } catch (error) {
        console.error('Error:', error);
    }
}

multipleBrowsers();

Multiple Pages in One Browser

Use multiple pages within a single browser for lighter resource usage:

const puppeteer = require('puppeteer');

async function multiplePages() {
    const browser = await puppeteer.launch({ headless: 'new' });

    try {
        // Create multiple pages
        const pages = await Promise.all([
            browser.newPage(),
            browser.newPage(),
            browser.newPage()
        ]);

        // Configure each page
        for (const page of pages) {
            await page.setViewport({ width: 1280, height: 720 });
            await page.setUserAgent('Mozilla/5.0 (compatible; Web Scraper)');
        }

        // Navigate pages in parallel
        const urls = ['https://example1.com', 'https://example2.com', 'https://example3.com'];
        await Promise.all(
            pages.map((page, index) => page.goto(urls[index]))
        );

        // Extract data from all pages
        const data = await Promise.all(
            pages.map(async (page) => ({
                url: page.url(),
                title: await page.title(),
                content: await page.$eval('body', el => el.textContent.slice(0, 100))
            }))
        );

        console.log('Scraped data:', data);
    } finally {
        await browser.close();
    }
}

multiplePages();

Advanced Use Cases

Load Testing with Multiple Browsers

const puppeteer = require('puppeteer');

async function loadTest(targetUrl, concurrentUsers = 5) {
    const browsers = [];
    const results = [];

    try {
        // Launch browsers for concurrent users
        for (let i = 0; i < concurrentUsers; i++) {
            const browser = await puppeteer.launch({
                headless: 'new',
                args: ['--no-sandbox', '--disable-dev-shm-usage']
            });
            browsers.push(browser);
        }

        // Simulate concurrent user sessions
        const promises = browsers.map(async (browser, index) => {
            const page = await browser.newPage();
            const startTime = Date.now();

            try {
                await page.goto(targetUrl, { waitUntil: 'networkidle0' });
                const loadTime = Date.now() - startTime;

                return {
                    user: index + 1,
                    loadTime,
                    success: true,
                    title: await page.title()
                };
            } catch (error) {
                return {
                    user: index + 1,
                    loadTime: Date.now() - startTime,
                    success: false,
                    error: error.message
                };
            }
        });

        const results = await Promise.all(promises);
        console.log('Load test results:', results);

        return results;
    } finally {
        // Clean up all browsers
        await Promise.all(browsers.map(browser => browser.close()));
    }
}

loadTest('https://example.com', 10);

User Session Simulation

const puppeteer = require('puppeteer');

class UserSession {
    constructor(userConfig) {
        this.config = userConfig;
        this.browser = null;
        this.page = null;
    }

    async initialize() {
        this.browser = await puppeteer.launch({
            headless: 'new',
            userDataDir: `./user-data-${this.config.id}`
        });

        this.page = await this.browser.newPage();
        await this.page.setViewport(this.config.viewport);

        if (this.config.cookies) {
            await this.page.setCookie(...this.config.cookies);
        }
    }

    async login() {
        await this.page.goto('https://example.com/login');
        await this.page.type('#username', this.config.username);
        await this.page.type('#password', this.config.password);
        await this.page.click('#login-button');
        await this.page.waitForNavigation();
    }

    async performActions() {
        // Simulate user behavior
        await this.page.goto('https://example.com/dashboard');
        await this.page.click('.menu-item');
        await this.page.waitForTimeout(2000);

        return await this.page.evaluate(() => ({
            url: window.location.href,
            timestamp: new Date().toISOString()
        }));
    }

    async cleanup() {
        if (this.browser) {
            await this.browser.close();
        }
    }
}

async function simulateMultipleUsers() {
    const users = [
        { id: 1, username: 'user1@example.com', password: 'pass1', viewport: { width: 1280, height: 720 } },
        { id: 2, username: 'user2@example.com', password: 'pass2', viewport: { width: 1920, height: 1080 } },
        { id: 3, username: 'user3@example.com', password: 'pass3', viewport: { width: 1366, height: 768 } }
    ];

    const sessions = users.map(config => new UserSession(config));

    try {
        // Initialize all sessions
        await Promise.all(sessions.map(session => session.initialize()));

        // Perform login for all users
        await Promise.all(sessions.map(session => session.login()));

        // Execute actions
        const results = await Promise.all(sessions.map(session => session.performActions()));

        console.log('User session results:', results);
    } finally {
        // Clean up all sessions
        await Promise.all(sessions.map(session => session.cleanup()));
    }
}

simulateMultipleUsers();

Best Practices

Resource Management

const puppeteer = require('puppeteer');

class BrowserPool {
    constructor(maxBrowsers = 5) {
        this.maxBrowsers = maxBrowsers;
        this.browsers = [];
        this.inUse = new Set();
    }

    async getBrowser() {
        // Reuse existing browser if available
        const availableBrowser = this.browsers.find(b => !this.inUse.has(b));
        if (availableBrowser) {
            this.inUse.add(availableBrowser);
            return availableBrowser;
        }

        // Create new browser if under limit
        if (this.browsers.length < this.maxBrowsers) {
            const browser = await puppeteer.launch({
                headless: 'new',
                args: ['--no-sandbox', '--disable-dev-shm-usage']
            });
            this.browsers.push(browser);
            this.inUse.add(browser);
            return browser;
        }

        // Wait for a browser to become available
        while (this.inUse.size === this.browsers.length) {
            await new Promise(resolve => setTimeout(resolve, 100));
        }

        return this.getBrowser();
    }

    async releaseBrowser(browser) {
        this.inUse.delete(browser);
    }

    async closeAll() {
        await Promise.all(this.browsers.map(browser => browser.close()));
        this.browsers = [];
        this.inUse.clear();
    }
}

// Usage example
async function useBrowserPool() {
    const pool = new BrowserPool(3);

    try {
        const tasks = Array.from({ length: 10 }, async (_, i) => {
            const browser = await pool.getBrowser();
            const page = await browser.newPage();

            try {
                await page.goto(`https://example.com/page${i}`);
                const title = await page.title();
                return { page: i, title };
            } finally {
                await page.close();
                await pool.releaseBrowser(browser);
            }
        });

        const results = await Promise.all(tasks);
        console.log('Results:', results);
    } finally {
        await pool.closeAll();
    }
}

useBrowserPool();

When to Use Each Approach

  • Multiple Browser Instances: Use for complete isolation, different user sessions, or when dealing with authentication
  • Multiple Pages: Use for faster execution with shared resources when isolation isn't critical
  • Browser Pooling: Use for high-throughput applications to manage resource usage effectively

Performance Considerations

  • Each browser instance uses ~50-100MB of RAM
  • Multiple pages share browser process resources
  • Use Promise.all() for parallel execution
  • Implement proper error handling and cleanup
  • Monitor system resources to avoid overwhelming the host

Try WebScraping.AI for Your Web Scraping Needs

Looking for a powerful web scraping solution? WebScraping.AI provides an LLM-powered API that combines Chromium JavaScript rendering with rotating proxies for reliable data extraction.

Key Features:

  • AI-powered extraction: Ask questions about web pages or extract structured data fields
  • JavaScript rendering: Full Chromium browser support for dynamic content
  • Rotating proxies: Datacenter and residential proxies from multiple countries
  • Easy integration: Simple REST API with SDKs for Python, Ruby, PHP, and more
  • Reliable & scalable: Built for developers who need consistent results

Getting Started:

Get page content with AI analysis:

curl "https://api.webscraping.ai/ai/question?url=https://example.com&question=What is the main topic?&api_key=YOUR_API_KEY"

Extract structured data:

curl "https://api.webscraping.ai/ai/fields?url=https://example.com&fields[title]=Page title&fields[price]=Product price&api_key=YOUR_API_KEY"

Try in request builder

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon