How do I scrape data from mobile-optimized websites using JavaScript?
Mobile-optimized websites often serve different content, layouts, and functionality compared to their desktop counterparts. When scraping these sites, you need to emulate mobile devices to access the mobile-specific content and ensure your scrapers work effectively. This guide covers comprehensive techniques for scraping mobile-optimized websites using JavaScript.
Understanding Mobile Website Differences
Mobile websites typically differ from desktop versions in several ways:
- Responsive design: Same content with different layouts
- Separate mobile URLs: Different domains (m.example.com) or paths (/mobile)
- Progressive Web Apps (PWAs): Enhanced mobile experiences
- Touch-optimized interfaces: Different interaction patterns
- Mobile-specific content: Features only available on mobile devices
- Performance optimizations: Reduced images, lazy loading, simplified navigation
Device Emulation with Puppeteer
Puppeteer provides excellent mobile device emulation capabilities. Here's how to set up mobile scraping:
Basic Mobile Device Emulation
const puppeteer = require('puppeteer');
async function scrapeMobileWebsite() {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
// Emulate iPhone 12 Pro
await page.emulate(puppeteer.devices['iPhone 12 Pro']);
await page.goto('https://example.com');
// Extract mobile-specific content
const data = await page.evaluate(() => {
return {
title: document.title,
mobileMenu: document.querySelector('.mobile-menu')?.textContent,
content: document.querySelector('.mobile-content')?.textContent
};
});
console.log(data);
await browser.close();
}
scrapeMobileWebsite();
Custom Mobile Viewport Configuration
async function customMobileEmulation() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Custom mobile viewport
await page.setViewport({
width: 375,
height: 812,
isMobile: true,
hasTouch: true,
deviceScaleFactor: 3
});
// Set mobile user agent
await page.setUserAgent(
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1'
);
await page.goto('https://example.com');
// Wait for mobile-specific elements to load
await page.waitForSelector('.mobile-navigation', { timeout: 5000 });
const mobileData = await page.evaluate(() => {
const articles = Array.from(document.querySelectorAll('.mobile-article'));
return articles.map(article => ({
title: article.querySelector('h2')?.textContent.trim(),
summary: article.querySelector('.summary')?.textContent.trim(),
url: article.querySelector('a')?.href
}));
});
await browser.close();
return mobileData;
}
Advanced Mobile Scraping Techniques
Handling Touch Interactions
Mobile websites often require touch interactions. Here's how to simulate them:
async function handleTouchInteractions() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.emulate(puppeteer.devices['iPhone 12']);
await page.goto('https://mobile-app.example.com');
// Simulate touch tap
await page.tap('.mobile-button');
// Simulate swipe gesture
const element = await page.$('.swipeable-content');
const box = await element.boundingBox();
await page.touchscreen.tap(box.x + box.width / 2, box.y + box.height / 2);
// Simulate scroll
await page.evaluate(() => {
window.scrollBy(0, window.innerHeight);
});
await page.waitForTimeout(1000);
const content = await page.$eval('.dynamic-content', el => el.textContent);
await browser.close();
return content;
}
Mobile-Specific Navigation Patterns
Mobile sites often use hamburger menus and different navigation patterns:
async function navigateMobileMenu() {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.emulate(puppeteer.devices['iPhone 12']);
await page.goto('https://example.com');
// Open mobile hamburger menu
await page.click('.hamburger-menu');
await page.waitForSelector('.mobile-nav-menu', { visible: true });
// Extract menu items
const menuItems = await page.evaluate(() => {
const items = document.querySelectorAll('.mobile-nav-menu a');
return Array.from(items).map(item => ({
text: item.textContent.trim(),
url: item.href
}));
});
// Navigate to specific section
await page.click('.mobile-nav-menu a[href*="products"]');
await page.waitForNavigation();
// Extract mobile product data
const products = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.product-card')).map(card => ({
name: card.querySelector('.product-name')?.textContent,
price: card.querySelector('.product-price')?.textContent,
image: card.querySelector('img')?.src
}));
});
await browser.close();
return { menuItems, products };
}
Using Playwright for Mobile Scraping
Playwright offers similar capabilities with a slightly different API:
const { chromium, devices } = require('playwright');
async function playwrightMobileScraping() {
const browser = await chromium.launch();
const context = await browser.newContext({
...devices['iPhone 12'],
geolocation: { longitude: -122.4194, latitude: 37.7749 },
permissions: ['geolocation']
});
const page = await context.newPage();
await page.goto('https://mobile.example.com');
// Handle mobile-specific loading
await page.waitForLoadState('networkidle');
// Extract data with error handling
const data = await page.evaluate(() => {
try {
const items = document.querySelectorAll('.mobile-item');
return Array.from(items).map(item => ({
title: item.querySelector('h3')?.textContent?.trim() || '',
description: item.querySelector('.description')?.textContent?.trim() || '',
price: item.querySelector('.price')?.textContent?.trim() || ''
}));
} catch (error) {
return { error: error.message };
}
});
await browser.close();
return data;
}
Handling Mobile-Specific Challenges
Lazy Loading and Infinite Scroll
Mobile sites frequently use lazy loading and infinite scroll:
async function handleMobileInfiniteScroll() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.emulate(puppeteer.devices['iPhone 12']);
await page.goto('https://infinite-scroll.example.com');
let previousItemCount = 0;
let currentItemCount = 0;
const allItems = [];
do {
previousItemCount = currentItemCount;
// Scroll to bottom
await page.evaluate(() => {
window.scrollTo(0, document.body.scrollHeight);
});
// Wait for new content to load
await page.waitForTimeout(2000);
// Count current items
currentItemCount = await page.evaluate(() => {
return document.querySelectorAll('.scroll-item').length;
});
// Extract newly loaded items
if (currentItemCount > previousItemCount) {
const newItems = await page.evaluate((prevCount) => {
const items = document.querySelectorAll('.scroll-item');
const newItemsArray = [];
for (let i = prevCount; i < items.length; i++) {
newItemsArray.push({
title: items[i].querySelector('h3')?.textContent,
content: items[i].querySelector('.content')?.textContent
});
}
return newItemsArray;
}, previousItemCount);
allItems.push(...newItems);
}
} while (currentItemCount > previousItemCount);
await browser.close();
return allItems;
}
Progressive Web App (PWA) Handling
Some mobile sites are PWAs that require special handling:
async function scrapePWA() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.emulate(puppeteer.devices['iPhone 12']);
// Enable service worker and offline capabilities
await page.setOfflineMode(false);
await page.goto('https://pwa.example.com', {
waitUntil: 'networkidle0'
});
// Wait for PWA to fully initialize
await page.waitForFunction(() => {
return window.navigator.serviceWorker.ready;
});
// Handle PWA-specific navigation
await page.evaluate(() => {
// Trigger PWA navigation
if (window.history && window.history.pushState) {
window.history.pushState({}, '', '/mobile-section');
window.dispatchEvent(new PopStateEvent('popstate'));
}
});
await page.waitForTimeout(1000);
const pwaContent = await page.evaluate(() => {
return {
appData: window.appData || {},
content: document.querySelector('.pwa-content')?.textContent,
isServiceWorkerActive: !!navigator.serviceWorker.controller
};
});
await browser.close();
return pwaContent;
}
Best Practices for Mobile Web Scraping
Network Optimization
Mobile networks can be slower, so optimize your scraping:
async function optimizedMobileScraping() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Emulate slower mobile network
await page.emulateNetworkConditions({
offline: false,
downloadThroughput: 1.5 * 1024 * 1024 / 8, // 1.5 Mbps
uploadThroughput: 750 * 1024 / 8, // 750 Kbps
latency: 40 // 40ms latency
});
await page.emulate(puppeteer.devices['iPhone 12']);
// Block unnecessary resources
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
if (resourceType === 'image' || resourceType === 'stylesheet' || resourceType === 'font') {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://example.com');
// Extract essential data only
const essentialData = await page.evaluate(() => {
return {
title: document.title,
mainContent: document.querySelector('main')?.textContent?.substring(0, 500),
links: Array.from(document.querySelectorAll('a')).slice(0, 10).map(a => a.href)
};
});
await browser.close();
return essentialData;
}
Error Handling and Retries
Mobile environments can be unpredictable:
async function robustMobileScraping(url, retries = 3) {
for (let attempt = 1; attempt <= retries; attempt++) {
const browser = await puppeteer.launch();
try {
const page = await browser.newPage();
await page.emulate(puppeteer.devices['iPhone 12']);
// Set longer timeouts for mobile
page.setDefaultTimeout(30000);
page.setDefaultNavigationTimeout(30000);
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 30000
});
const data = await page.evaluate(() => {
// Your scraping logic here
return {
success: true,
data: document.querySelector('.content')?.textContent
};
});
await browser.close();
return data;
} catch (error) {
await browser.close();
if (attempt === retries) {
throw new Error(`Failed after ${retries} attempts: ${error.message}`);
}
// Wait before retry
await new Promise(resolve => setTimeout(resolve, 1000 * attempt));
}
}
}
Monitoring and Debugging Mobile Scraping
When debugging mobile scraping issues, how can I set viewport in Puppeteer can help you understand viewport-related problems:
async function debugMobileScraping() {
const browser = await puppeteer.launch({
headless: false,
devtools: true
});
const page = await browser.newPage();
await page.emulate(puppeteer.devices['iPhone 12']);
// Enable console logging
page.on('console', msg => console.log('PAGE LOG:', msg.text()));
page.on('pageerror', error => console.log('PAGE ERROR:', error.message));
await page.goto('https://example.com');
// Take screenshot for debugging
await page.screenshot({
path: 'mobile-debug.png',
fullPage: true
});
// Log viewport information
const viewportInfo = await page.evaluate(() => {
return {
innerWidth: window.innerWidth,
innerHeight: window.innerHeight,
userAgent: navigator.userAgent,
isMobile: /Mobi|Android/i.test(navigator.userAgent)
};
});
console.log('Viewport Info:', viewportInfo);
await browser.close();
}
Integration with WebScraping.AI
For production mobile web scraping, consider using specialized APIs that handle mobile emulation automatically. This approach can simplify your code and improve reliability when dealing with complex mobile sites that require advanced how to handle AJAX requests using Puppeteer capabilities.
Conclusion
Scraping mobile-optimized websites requires careful consideration of device emulation, touch interactions, network conditions, and mobile-specific UI patterns. By using tools like Puppeteer and Playwright with proper mobile device emulation, you can effectively extract data from mobile sites while handling the unique challenges they present.
Remember to respect robots.txt files, implement appropriate delays, and consider the legal implications of your scraping activities. Mobile sites often have different rate limiting and anti-bot measures, so always scrape responsibly and ethically.