Puppeteer provides powerful methods to interact with DOM elements in headless Chrome. This guide covers all essential DOM manipulation techniques for web scraping and automation.
Selecting DOM Elements
Single Element Selection
Use page.$()
to select the first matching element (equivalent to document.querySelector()
):
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://example.com');
// Select by tag, class, ID, or CSS selector
const titleElement = await page.$('h1');
const button = await page.$('.submit-btn');
const form = await page.$('#login-form');
const link = await page.$('a[href="/about"]');
await browser.close();
})();
Multiple Elements Selection
Use page.$$()
to select all matching elements (equivalent to document.querySelectorAll()
):
const allLinks = await page.$$('a');
const listItems = await page.$$('ul li');
const images = await page.$$('img[src]');
console.log(`Found ${allLinks.length} links on the page`);
Extracting Content from Elements
Text Content
// Using element handle
const titleElement = await page.$('h1');
const titleText = await page.evaluate(el => el.textContent, titleElement);
// Direct extraction without element handle
const titleDirect = await page.$eval('h1', el => el.textContent);
// Multiple elements
const allTexts = await page.$$eval('p', elements =>
elements.map(el => el.textContent)
);
HTML Content
const htmlContent = await page.$eval('.content', el => el.innerHTML);
const outerHTML = await page.$eval('.container', el => el.outerHTML);
Attribute Values
const linkUrl = await page.$eval('a', el => el.getAttribute('href'));
const imageAlt = await page.$eval('img', el => el.alt);
const inputValue = await page.$eval('input[name="username"]', el => el.value);
// Multiple attributes
const linkData = await page.$eval('a', el => ({
href: el.href,
text: el.textContent,
title: el.title
}));
Clicking Elements
Basic Clicking
// Click using element handle
const button = await page.$('button');
await button.click();
// Direct click without storing element
await page.click('.submit-btn');
// Click with options
await page.click('#submit', {
button: 'left',
clickCount: 1,
delay: 100
});
Advanced Click Scenarios
// Right-click (context menu)
await page.click('button', { button: 'right' });
// Double-click
await page.click('button', { clickCount: 2 });
// Click at specific coordinates
await page.mouse.click(100, 200);
// Click and wait for navigation
await Promise.all([
page.waitForNavigation(),
page.click('a[href="/next-page"]')
]);
Form Interaction
Text Input
// Type in input field
await page.type('input[name="username"]', 'john_doe');
// Clear and type
await page.click('input[name="email"]', { clickCount: 3 });
await page.type('input[name="email"]', 'new@email.com');
// Type with delay between keystrokes
await page.type('input[type="password"]', 'secret123', { delay: 100 });
Select Dropdowns
// Select by value
await page.select('select[name="country"]', 'us');
// Select multiple options
await page.select('select[name="languages"]', ['en', 'es', 'fr']);
// Select by text content
await page.evaluate(() => {
const select = document.querySelector('select[name="country"]');
const option = Array.from(select.options).find(opt => opt.text === 'United States');
if (option) option.selected = true;
});
Checkboxes and Radio Buttons
// Check/uncheck checkboxes
await page.click('input[type="checkbox"][name="newsletter"]');
// Select radio button
await page.click('input[type="radio"][value="premium"]');
// Check current state
const isChecked = await page.$eval('input[type="checkbox"]', el => el.checked);
Advanced DOM Manipulation
Setting Attributes and Properties
// Set attribute
await page.evaluate(() => {
document.querySelector('img').src = 'new-image.jpg';
});
// Set multiple properties
await page.evaluate(() => {
const input = document.querySelector('input[name="amount"]');
input.value = '100';
input.setAttribute('max', '500');
});
Waiting for Elements
// Wait for element to appear
await page.waitForSelector('.dynamic-content');
// Wait for element to disappear
await page.waitForSelector('.loading-spinner', { hidden: true });
// Wait with timeout
await page.waitForSelector('.slow-element', { timeout: 10000 });
// Wait for function to return true
await page.waitForFunction(() =>
document.querySelector('.status').textContent === 'Ready'
);
Handling Dynamic Content
// Wait for element and then interact
await page.waitForSelector('.ajax-loaded-button');
await page.click('.ajax-loaded-button');
// Scroll to element before interaction
await page.evaluate(element => {
element.scrollIntoView();
}, await page.$('.footer-button'));
await page.click('.footer-button');
Error Handling and Best Practices
Safe Element Interaction
async function safeClick(page, selector) {
try {
await page.waitForSelector(selector, { timeout: 5000 });
await page.click(selector);
return true;
} catch (error) {
console.log(`Failed to click ${selector}: ${error.message}`);
return false;
}
}
// Usage
const clicked = await safeClick(page, '.optional-button');
if (clicked) {
console.log('Button clicked successfully');
}
Element Existence Check
// Check if element exists
const elementExists = await page.$('.optional-element') !== null;
// Alternative approach
const elementExists2 = await page.evaluate(() =>
!!document.querySelector('.optional-element')
);
Complete Example
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
try {
await page.goto('https://example.com/form');
// Fill out a form
await page.type('#username', 'testuser');
await page.type('#password', 'testpass123');
await page.select('#country', 'us');
await page.click('#newsletter');
// Extract some data
const title = await page.$eval('h1', el => el.textContent);
const links = await page.$$eval('a', elements =>
elements.map(el => ({ text: el.textContent, href: el.href }))
);
console.log('Page title:', title);
console.log('Found links:', links.length);
// Submit form and wait for response
await Promise.all([
page.waitForNavigation(),
page.click('#submit-btn')
]);
// Check success message
const successMsg = await page.$eval('.success', el => el.textContent);
console.log('Success:', successMsg);
} catch (error) {
console.error('Error:', error.message);
} finally {
await browser.close();
}
})();
Key Takeaways
- Use
page.$()
for single elements andpage.$$()
for multiple elements - Combine
page.waitForSelector()
with interactions for dynamic content - Use
page.evaluate()
for complex DOM operations within browser context - Always handle errors and check element existence for robust automation
- Consider using
page.$eval()
andpage.$$eval()
for direct content extraction