Puppeteer is a powerful Node.js library that provides a high-level API to control headless Chrome or Chromium browsers. It's an excellent tool for SEO auditing because it can analyze web pages exactly as search engines see them, including JavaScript-rendered content.
Installation and Setup
First, install Puppeteer in your project:
npm init -y
npm install puppeteer
Create a comprehensive SEO audit script (seo-audit.js
):
const puppeteer = require('puppeteer');
const fs = require('fs');
class SEOAuditor {
constructor() {
this.results = {
url: '',
timestamp: new Date().toISOString(),
issues: [],
warnings: [],
passed: []
};
}
async auditPage(url) {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
// Set viewport for mobile-first indexing
await page.setViewport({ width: 1200, height: 800 });
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
this.results.url = url;
await this.checkBasicSEO(page);
await this.checkTechnicalSEO(page);
await this.checkContentSEO(page);
await this.checkPerformance(page);
} catch (error) {
this.results.issues.push(`Failed to load page: ${error.message}`);
} finally {
await browser.close();
}
return this.results;
}
async checkBasicSEO(page) {
// Title tag
const title = await page.title();
if (!title) {
this.results.issues.push('Missing title tag');
} else if (title.length < 30) {
this.results.warnings.push(`Title too short: ${title.length} characters`);
} else if (title.length > 60) {
this.results.warnings.push(`Title too long: ${title.length} characters`);
} else {
this.results.passed.push(`Title length optimal: ${title.length} characters`);
}
// Meta description
const metaDescription = await page.$eval(
'meta[name="description"]',
el => el.content
).catch(() => null);
if (!metaDescription) {
this.results.issues.push('Missing meta description');
} else if (metaDescription.length < 120) {
this.results.warnings.push(`Meta description too short: ${metaDescription.length} characters`);
} else if (metaDescription.length > 160) {
this.results.warnings.push(`Meta description too long: ${metaDescription.length} characters`);
} else {
this.results.passed.push(`Meta description length optimal: ${metaDescription.length} characters`);
}
// Canonical URL
const canonical = await page.$eval(
'link[rel="canonical"]',
el => el.href
).catch(() => null);
if (!canonical) {
this.results.warnings.push('Missing canonical URL');
} else {
this.results.passed.push('Canonical URL present');
}
}
async checkTechnicalSEO(page) {
// Check for multiple H1 tags
const h1Tags = await page.$$eval('h1', elements =>
elements.map(el => el.innerText.trim())
);
if (h1Tags.length === 0) {
this.results.issues.push('No H1 tag found');
} else if (h1Tags.length > 1) {
this.results.warnings.push(`Multiple H1 tags found: ${h1Tags.length}`);
} else {
this.results.passed.push('Single H1 tag present');
}
// Check heading hierarchy
const headings = await page.$$eval('h1, h2, h3, h4, h5, h6', elements =>
elements.map(el => ({
tag: el.tagName.toLowerCase(),
text: el.innerText.trim()
}))
);
// Check images without alt text
const imagesWithoutAlt = await page.$$eval('img', images =>
images.filter(img => !img.alt || img.alt.trim() === '').length
);
if (imagesWithoutAlt > 0) {
this.results.warnings.push(`${imagesWithoutAlt} images missing alt text`);
} else {
this.results.passed.push('All images have alt text');
}
// Check for robots meta tag
const robotsMeta = await page.$eval(
'meta[name="robots"]',
el => el.content
).catch(() => null);
if (robotsMeta && robotsMeta.includes('noindex')) {
this.results.warnings.push('Page has noindex directive');
}
// Check for structured data
const structuredData = await page.$$eval(
'script[type="application/ld+json"]',
scripts => scripts.length
);
if (structuredData === 0) {
this.results.warnings.push('No structured data found');
} else {
this.results.passed.push(`${structuredData} structured data blocks found`);
}
}
async checkContentSEO(page) {
// Word count
const wordCount = await page.evaluate(() => {
const text = document.body.innerText || document.body.textContent || '';
return text.trim().split(/\s+/).length;
});
if (wordCount < 300) {
this.results.warnings.push(`Low word count: ${wordCount} words`);
} else {
this.results.passed.push(`Good word count: ${wordCount} words`);
}
// Internal and external links
const links = await page.$$eval('a[href]', anchors =>
anchors.map(a => ({
href: a.href,
text: a.innerText.trim(),
internal: a.href.includes(window.location.hostname)
}))
);
const internalLinks = links.filter(link => link.internal).length;
const externalLinks = links.filter(link => !link.internal).length;
this.results.passed.push(`Internal links: ${internalLinks}, External links: ${externalLinks}`);
}
async checkPerformance(page) {
// Get Core Web Vitals
const metrics = await page.evaluate(() => {
return new Promise((resolve) => {
new PerformanceObserver((list) => {
const entries = list.getEntries();
const vitals = {};
entries.forEach((entry) => {
if (entry.entryType === 'largest-contentful-paint') {
vitals.lcp = entry.startTime;
}
if (entry.entryType === 'first-input') {
vitals.fid = entry.processingStart - entry.startTime;
}
});
resolve(vitals);
}).observe({ entryTypes: ['largest-contentful-paint', 'first-input'] });
// Fallback timeout
setTimeout(() => resolve({}), 5000);
});
});
// Page load time
const loadTime = await page.evaluate(() => {
return performance.timing.loadEventEnd - performance.timing.navigationStart;
});
if (loadTime > 3000) {
this.results.warnings.push(`Slow page load time: ${loadTime}ms`);
} else {
this.results.passed.push(`Good page load time: ${loadTime}ms`);
}
}
generateReport() {
const report = {
...this.results,
summary: {
totalIssues: this.results.issues.length,
totalWarnings: this.results.warnings.length,
totalPassed: this.results.passed.length
}
};
return JSON.stringify(report, null, 2);
}
}
// Usage
async function runAudit() {
const auditor = new SEOAuditor();
const url = process.argv[2] || 'https://example.com';
console.log(`Starting SEO audit for: ${url}`);
const results = await auditor.auditPage(url);
const report = auditor.generateReport();
// Save report to file
fs.writeFileSync(`seo-audit-${Date.now()}.json`, report);
// Console output
console.log('\n=== SEO AUDIT RESULTS ===');
console.log(`Issues: ${results.issues.length}`);
console.log(`Warnings: ${results.warnings.length}`);
console.log(`Passed: ${results.passed.length}`);
if (results.issues.length > 0) {
console.log('\n🔴 Issues:');
results.issues.forEach(issue => console.log(` - ${issue}`));
}
if (results.warnings.length > 0) {
console.log('\n🟡 Warnings:');
results.warnings.forEach(warning => console.log(` - ${warning}`));
}
}
if (require.main === module) {
runAudit().catch(console.error);
}
module.exports = SEOAuditor;
Running the SEO Audit
Execute the audit script with a target URL:
node seo-audit.js https://yourwebsite.com
Advanced SEO Checks
For more comprehensive auditing, you can extend the script with additional checks:
// Check for mobile-friendliness
async function checkMobileOptimization(page) {
await page.setViewport({ width: 375, height: 667 }); // Mobile viewport
const hasViewportMeta = await page.$('meta[name="viewport"]') !== null;
const mobileContent = await page.evaluate(() => {
const elements = document.querySelectorAll('*');
let hasHorizontalScroll = false;
elements.forEach(el => {
if (el.scrollWidth > window.innerWidth) {
hasHorizontalScroll = true;
}
});
return { hasHorizontalScroll };
});
return { hasViewportMeta, ...mobileContent };
}
// Check for broken links
async function checkBrokenLinks(page) {
const links = await page.$$eval('a[href]', anchors =>
anchors.map(a => a.href).filter(href => href.startsWith('http'))
);
const brokenLinks = [];
for (const link of links) {
try {
const response = await page.goto(link, { timeout: 5000 });
if (response.status() >= 400) {
brokenLinks.push({ url: link, status: response.status() });
}
} catch (error) {
brokenLinks.push({ url: link, error: error.message });
}
}
return brokenLinks;
}
Batch Auditing Multiple Pages
For auditing multiple pages or entire websites:
async function auditSitemap(sitemapUrl) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Fetch sitemap
await page.goto(sitemapUrl);
const urls = await page.$$eval('url > loc', locs =>
locs.map(loc => loc.textContent)
);
const auditor = new SEOAuditor();
const results = [];
for (const url of urls.slice(0, 10)) { // Limit for demo
console.log(`Auditing: ${url}`);
const result = await auditor.auditPage(url);
results.push(result);
}
await browser.close();
return results;
}
Key SEO Elements Checked
This comprehensive audit script checks:
- Meta tags: Title, description, canonical, robots
- Content structure: Headings hierarchy, word count
- Images: Alt text presence and quality
- Links: Internal/external link analysis
- Performance: Page load time, Core Web Vitals
- Technical SEO: Structured data, mobile optimization
- Accessibility: Basic accessibility checks
Best Practices for SEO Auditing
- Run audits regularly: Schedule automated audits to catch issues early
- Test on different devices: Use various viewport sizes
- Check after deployments: Ensure changes don't break SEO
- Monitor performance: Track Core Web Vitals over time
- Save historical data: Compare audit results across time
This comprehensive approach to SEO auditing with Puppeteer provides deep insights into your website's search engine optimization status and helps identify areas for improvement.