Handling file downloads with Puppeteer-Sharp requires configuring the browser's download behavior and implementing proper download monitoring. This guide covers everything from basic setup to production-ready download handling.
Installation
First, install Puppeteer-Sharp in your .NET project:
dotnet add package PuppeteerSharp
Basic Download Setup
1. Configure Browser for Downloads
using System;
using System.IO;
using System.Threading.Tasks;
using PuppeteerSharp;
public class DownloadHandler
{
public static async Task<string> DownloadFileAsync(string url, string selector, string downloadDirectory)
{
// Ensure download directory exists
Directory.CreateDirectory(downloadDirectory);
// Download Chromium if needed
await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = true,
Args = new[] { "--disable-web-security", "--disable-features=VizDisplayCompositor" }
});
var page = await browser.NewPageAsync();
// Configure download behavior
await page.Client.SendAsync("Page.setDownloadBehavior", new
{
behavior = "allow",
downloadPath = downloadDirectory
});
try
{
// Navigate and trigger download
await page.GoToAsync(url);
await page.ClickAsync(selector);
// Wait for download to complete
var downloadedFile = await WaitForDownloadAsync(downloadDirectory);
return downloadedFile;
}
finally
{
await browser.CloseAsync();
}
}
}
2. Monitor Download Completion
Instead of using Task.Delay()
, implement proper download monitoring:
private static async Task<string> WaitForDownloadAsync(string downloadPath, int timeoutMs = 30000)
{
var startTime = DateTime.UtcNow;
var timeout = TimeSpan.FromMilliseconds(timeoutMs);
while (DateTime.UtcNow - startTime < timeout)
{
var files = Directory.GetFiles(downloadPath);
// Look for completed downloads (not .crdownload files)
var completedFiles = files.Where(f => !f.EndsWith(".crdownload") && !f.EndsWith(".tmp"));
if (completedFiles.Any())
{
// Return the most recently created file
return completedFiles.OrderByDescending(f => File.GetCreationTime(f)).First();
}
await Task.Delay(500); // Check every 500ms
}
throw new TimeoutException($"Download did not complete within {timeoutMs}ms");
}
Advanced Download Scenarios
Multiple File Downloads
public static async Task DownloadMultipleFilesAsync(Dictionary<string, string> urlSelectorPairs, string downloadDirectory)
{
var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
try
{
var downloadTasks = urlSelectorPairs.Select(async pair =>
{
var page = await browser.NewPageAsync();
await page.Client.SendAsync("Page.setDownloadBehavior", new
{
behavior = "allow",
downloadPath = downloadDirectory
});
await page.GoToAsync(pair.Key);
await page.ClickAsync(pair.Value);
// Each page can be closed after triggering download
await page.CloseAsync();
});
await Task.WhenAll(downloadTasks);
// Wait for all downloads to complete
await WaitForMultipleDownloadsAsync(downloadDirectory, urlSelectorPairs.Count);
}
finally
{
await browser.CloseAsync();
}
}
Form-Based Downloads
public static async Task DownloadFromFormAsync(string url, Dictionary<string, string> formData, string downloadDirectory)
{
var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
var page = await browser.NewPageAsync();
await page.Client.SendAsync("Page.setDownloadBehavior", new
{
behavior = "allow",
downloadPath = downloadDirectory
});
try
{
await page.GoToAsync(url);
// Fill form fields
foreach (var field in formData)
{
await page.TypeAsync($"input[name='{field.Key}']", field.Value);
}
// Submit form and trigger download
await page.ClickAsync("input[type='submit']");
var downloadedFile = await WaitForDownloadAsync(downloadDirectory);
Console.WriteLine($"Downloaded: {downloadedFile}");
}
finally
{
await browser.CloseAsync();
}
}
Download Event Monitoring
For more precise control, you can monitor browser events:
public static async Task DownloadWithEventMonitoringAsync(string url, string selector, string downloadDirectory)
{
var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
var page = await browser.NewPageAsync();
var downloadStarted = false;
var downloadCompleted = false;
// Monitor download events
await page.Client.SendAsync("Browser.setDownloadBehavior", new
{
behavior = "allow",
downloadPath = downloadDirectory
});
// Listen for download progress
page.Client.MessageReceived += (sender, e) =>
{
if (e.MessageID == "Browser.downloadWillBegin")
{
downloadStarted = true;
Console.WriteLine("Download started");
}
else if (e.MessageID == "Browser.downloadProgress")
{
// Handle download progress updates
var data = e.MessageData.ToObject<dynamic>();
if (data.state == "completed")
{
downloadCompleted = true;
Console.WriteLine("Download completed");
}
}
};
await page.GoToAsync(url);
await page.ClickAsync(selector);
// Wait for download to start and complete
while (!downloadStarted || !downloadCompleted)
{
await Task.Delay(100);
}
await browser.CloseAsync();
}
Error Handling and Best Practices
public static async Task<DownloadResult> SafeDownloadAsync(string url, string selector, string downloadDirectory)
{
var result = new DownloadResult();
try
{
// Validate inputs
if (string.IsNullOrEmpty(url) || string.IsNullOrEmpty(selector))
throw new ArgumentException("URL and selector are required");
Directory.CreateDirectory(downloadDirectory);
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = true,
Timeout = 30000
});
var page = await browser.NewPageAsync();
// Set timeouts
page.DefaultTimeout = 30000;
page.DefaultNavigationTimeout = 30000;
await page.Client.SendAsync("Page.setDownloadBehavior", new
{
behavior = "allow",
downloadPath = downloadDirectory
});
await page.GoToAsync(url, new NavigationOptions { WaitUntil = new[] { WaitUntilNavigation.Networkidle0 } });
// Check if download element exists
var element = await page.QuerySelectorAsync(selector);
if (element == null)
{
result.Error = $"Download element not found: {selector}";
return result;
}
await page.ClickAsync(selector);
result.FilePath = await WaitForDownloadAsync(downloadDirectory);
result.Success = true;
await browser.CloseAsync();
}
catch (Exception ex)
{
result.Error = ex.Message;
result.Success = false;
}
return result;
}
public class DownloadResult
{
public bool Success { get; set; }
public string FilePath { get; set; }
public string Error { get; set; }
}
Key Points
- Always configure download behavior before navigating to the download page
- Use proper download monitoring instead of arbitrary delays
- Handle timeouts gracefully for large files or slow networks
- Validate download completion by checking file existence and size
- Clean up resources by properly closing browser instances
- Consider concurrent downloads for better performance with multiple files
This approach provides robust file download handling suitable for production environments while maintaining good performance and error handling.