How to use the Playwright API?

Playwright is a powerful Node.js library that provides a unified API to automate Chromium, Firefox, and WebKit browsers. It enables developers to control browsers programmatically for web scraping, automated testing, generating PDFs, taking screenshots, and more.

Installation

Install Playwright and browser binaries:

# Install Playwright
npm install playwright

# Or install specific browsers only
npm install playwright-chromium
npm install playwright-firefox
npm install playwright-webkit

For system-wide installation:

# Install Playwright CLI globally
npm install -g @playwright/test

# Install browser binaries
npx playwright install

Basic Usage

Simple Browser Automation

const { chromium } = require('playwright');

(async () => {
  // Launch browser
  const browser = await chromium.launch({ headless: false });
  const context = await browser.newContext();
  const page = await context.newPage();

  // Navigate and interact
  await page.goto('https://example.com');
  await page.screenshot({ path: 'example.png' });

  await browser.close();
})();

Cross-Browser Support

const { chromium, firefox, webkit } = require('playwright');

async function runTest(browserType) {
  const browser = await browserType.launch();
  const page = await browser.newPage();

  await page.goto('https://example.com');
  const title = await page.title();
  console.log(`${browserType.name()}: ${title}`);

  await browser.close();
}

// Run on all browsers
(async () => {
  await runTest(chromium);
  await runTest(firefox);
  await runTest(webkit);
})();

Core API Methods

Browser Management

// Launch options
const browser = await chromium.launch({
  headless: true,        // Run in background
  slowMo: 50,           // Slow down operations
  devtools: true,       // Open DevTools
  args: ['--start-maximized']
});

// Browser contexts (isolated sessions)
const context = await browser.newContext({
  viewport: { width: 1280, height: 720 },
  userAgent: 'Custom User Agent',
  locale: 'en-US'
});

Page Interactions

const page = await context.newPage();

// Navigation
await page.goto('https://example.com');
await page.goBack();
await page.goForward();
await page.reload();

// Element interactions
await page.click('button');
await page.fill('input[name="email"]', 'user@example.com');
await page.selectOption('select#country', 'US');
await page.check('input[type="checkbox"]');

// Wait for elements
await page.waitForSelector('.result');
await page.waitForLoadState('networkidle');

Web Scraping Examples

Extract Text Content

const { chromium } = require('playwright');

(async () => {
  const browser = await chromium.launch();
  const page = await browser.newPage();

  await page.goto('https://news.ycombinator.com');

  // Extract headlines
  const headlines = await page.$$eval('.titleline > a', links => 
    links.map(link => ({
      title: link.textContent.trim(),
      url: link.href
    }))
  );

  console.log(headlines);
  await browser.close();
})();

Handle Dynamic Content

async function scrapeWithWaiting() {
  const browser = await chromium.launch();
  const page = await browser.newPage();

  await page.goto('https://example.com/search');

  // Fill search form
  await page.fill('#search-input', 'playwright');
  await page.click('#search-button');

  // Wait for results to load
  await page.waitForSelector('.search-results');

  // Extract results
  const results = await page.$$eval('.result-item', items =>
    items.map(item => ({
      title: item.querySelector('.title').textContent,
      description: item.querySelector('.description').textContent
    }))
  );

  await browser.close();
  return results;
}

Advanced Features

Screenshots and PDFs

// Full page screenshot
await page.screenshot({ 
  path: 'fullpage.png', 
  fullPage: true 
});

// Element screenshot
const element = await page.locator('.content');
await element.screenshot({ path: 'element.png' });

// Generate PDF
await page.pdf({ 
  path: 'page.pdf', 
  format: 'A4',
  printBackground: true
});

Request Interception

// Block images and stylesheets
await page.route('**/*.{png,jpg,jpeg,gif,css}', route => route.abort());

// Modify requests
await page.route('**/api/**', route => {
  route.fulfill({
    status: 200,
    body: JSON.stringify({ message: 'Mocked response' })
  });
});

Multiple Pages and Contexts

const browser = await chromium.launch();

// Create multiple contexts (isolated sessions)
const userContext = await browser.newContext();
const adminContext = await browser.newContext();

// Multiple pages in same context
const page1 = await userContext.newPage();
const page2 = await userContext.newPage();

await page1.goto('https://example.com/user');
await page2.goto('https://example.com/settings');

Error Handling

const { chromium } = require('playwright');

async function robustScraping() {
  let browser;
  try {
    browser = await chromium.launch();
    const page = await browser.newPage();

    // Set timeout
    page.setDefaultTimeout(30000);

    await page.goto('https://example.com', { 
      waitUntil: 'networkidle' 
    });

    // Check if element exists
    const hasElement = await page.locator('.content').isVisible();
    if (!hasElement) {
      throw new Error('Required element not found');
    }

    const content = await page.textContent('.content');
    return content;

  } catch (error) {
    console.error('Scraping failed:', error.message);
    throw error;
  } finally {
    if (browser) {
      await browser.close();
    }
  }
}

Best Practices

  1. Always close browsers: Use try/finally blocks to ensure cleanup
  2. Use contexts: Isolate sessions with browser contexts
  3. Handle timeouts: Set appropriate timeouts for slow pages
  4. Wait for content: Use proper waiting strategies
  5. Respect rate limits: Add delays between requests when scraping

Common Patterns

Page Object Model

class HomePage {
  constructor(page) {
    this.page = page;
    this.searchInput = page.locator('#search');
    this.searchButton = page.locator('#search-btn');
  }

  async search(query) {
    await this.searchInput.fill(query);
    await this.searchButton.click();
    await this.page.waitForLoadState('networkidle');
  }
}

The Playwright API offers extensive capabilities for browser automation. For complete documentation and advanced features, visit the official Playwright documentation.

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon