Yes, Puppeteer-Sharp can handle multiple pages or tabs simultaneously. As the .NET port of Google's Puppeteer, it fully supports asynchronous operations and provides robust APIs for managing multiple browser contexts, pages, and tabs concurrently.
Basic Multi-Page Example
Here's a simple example showing how to work with multiple pages:
using System;
using System.Threading.Tasks;
using PuppeteerSharp;
class Program
{
public static async Task Main()
{
// Download browser if needed
await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
// Launch browser
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = true
});
try
{
// Create multiple pages
var page1 = await browser.NewPageAsync();
var page2 = await browser.NewPageAsync();
var page3 = await browser.NewPageAsync();
// Navigate to different URLs simultaneously
var navigationTasks = new[]
{
page1.GoToAsync("https://example.com"),
page2.GoToAsync("https://github.com"),
page3.GoToAsync("https://stackoverflow.com")
};
await Task.WhenAll(navigationTasks);
// Extract data from all pages concurrently
var dataTasks = new[]
{
page1.GetTitleAsync(),
page2.GetTitleAsync(),
page3.GetTitleAsync()
};
var titles = await Task.WhenAll(dataTasks);
for (int i = 0; i < titles.Length; i++)
{
Console.WriteLine($"Page {i + 1} title: {titles[i]}");
}
// Close all pages
await Task.WhenAll(page1.CloseAsync(), page2.CloseAsync(), page3.CloseAsync());
}
finally
{
await browser.CloseAsync();
}
}
}
Advanced Concurrent Processing
For more complex scenarios, you can process multiple pages with different operations:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using PuppeteerSharp;
public class MultiPageScraper
{
public static async Task ProcessMultipleUrls(string[] urls)
{
var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
try
{
var tasks = urls.Select(async url =>
{
var page = await browser.NewPageAsync();
try
{
await page.GoToAsync(url, new NavigationOptions
{
Timeout = 30000,
WaitUntil = new[] { WaitUntilNavigation.Networkidle0 }
});
// Extract data
var title = await page.GetTitleAsync();
var metaDescription = await page.EvaluateExpressionAsync<string>(
"document.querySelector('meta[name=\"description\"]')?.content || ''"
);
// Take screenshot
await page.ScreenshotAsync($"screenshot_{DateTime.Now.Ticks}.png");
return new { Url = url, Title = title, Description = metaDescription };
}
catch (Exception ex)
{
Console.WriteLine($"Error processing {url}: {ex.Message}");
return new { Url = url, Title = "Error", Description = ex.Message };
}
finally
{
await page.CloseAsync();
}
});
var results = await Task.WhenAll(tasks);
foreach (var result in results)
{
Console.WriteLine($"URL: {result.Url}");
Console.WriteLine($"Title: {result.Title}");
Console.WriteLine($"Description: {result.Description}");
Console.WriteLine("---");
}
}
finally
{
await browser.CloseAsync();
}
}
}
Managing Browser Contexts
For better isolation, you can use browser contexts to separate different sessions:
var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
// Create separate contexts for different users/sessions
var context1 = await browser.CreateIncognitoBrowserContextAsync();
var context2 = await browser.CreateIncognitoBrowserContextAsync();
// Each context has its own cookies, localStorage, etc.
var page1 = await context1.NewPageAsync();
var page2 = await context2.NewPageAsync();
// Set different user agents
await page1.SetUserAgentAsync("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36");
await page2.SetUserAgentAsync("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36");
// Navigate and perform operations
await Task.WhenAll(
page1.GoToAsync("https://httpbin.org/user-agent"),
page2.GoToAsync("https://httpbin.org/user-agent")
);
Best Practices for Multi-Page Management
1. Resource Management
// Limit concurrent pages to avoid memory issues
var semaphore = new SemaphoreSlim(5, 5); // Max 5 concurrent pages
var tasks = urls.Select(async url =>
{
await semaphore.WaitAsync();
try
{
var page = await browser.NewPageAsync();
// Process page...
await page.CloseAsync();
}
finally
{
semaphore.Release();
}
});
2. Error Handling
public static async Task<PageResult> ProcessPageSafely(Browser browser, string url)
{
Page page = null;
try
{
page = await browser.NewPageAsync();
await page.GoToAsync(url);
var title = await page.GetTitleAsync();
return new PageResult { Success = true, Title = title, Url = url };
}
catch (Exception ex)
{
return new PageResult { Success = false, Error = ex.Message, Url = url };
}
finally
{
if (page != null)
await page.CloseAsync();
}
}
3. Memory and Performance Optimization
var launchOptions = new LaunchOptions
{
Headless = true,
Args = new[]
{
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--disable-features=VizDisplayCompositor"
}
};
Key Considerations
Concurrency Limits: Chrome has practical limits on concurrent connections. Consider using a semaphore to limit simultaneous pages (typically 5-10).
Memory Usage: Each page consumes significant memory. Monitor RAM usage and implement cleanup strategies.
Context Isolation: Use browser contexts for scenarios requiring different sessions, cookies, or user profiles.
Error Isolation: Failures in one page shouldn't affect others. Always use try-catch blocks for individual page operations.
Resource Cleanup: Always dispose of pages and browser instances properly using
using
statements or finally blocks.
Puppeteer-Sharp's asynchronous nature makes it excellent for handling multiple pages efficiently, enabling powerful web scraping and automation scenarios.