How do I handle file downloads with Puppeteer-Sharp?

Handling file downloads with Puppeteer-Sharp requires configuring the browser's download behavior and implementing proper download monitoring. This guide covers everything from basic setup to production-ready download handling.

Installation

First, install Puppeteer-Sharp in your .NET project:

dotnet add package PuppeteerSharp

Basic Download Setup

1. Configure Browser for Downloads

using System;
using System.IO;
using System.Threading.Tasks;
using PuppeteerSharp;

public class DownloadHandler
{
    public static async Task<string> DownloadFileAsync(string url, string selector, string downloadDirectory)
    {
        // Ensure download directory exists
        Directory.CreateDirectory(downloadDirectory);

        // Download Chromium if needed
        await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);

        var browser = await Puppeteer.LaunchAsync(new LaunchOptions
        {
            Headless = true,
            Args = new[] { "--disable-web-security", "--disable-features=VizDisplayCompositor" }
        });

        var page = await browser.NewPageAsync();

        // Configure download behavior
        await page.Client.SendAsync("Page.setDownloadBehavior", new
        {
            behavior = "allow",
            downloadPath = downloadDirectory
        });

        try
        {
            // Navigate and trigger download
            await page.GoToAsync(url);
            await page.ClickAsync(selector);

            // Wait for download to complete
            var downloadedFile = await WaitForDownloadAsync(downloadDirectory);

            return downloadedFile;
        }
        finally
        {
            await browser.CloseAsync();
        }
    }
}

2. Monitor Download Completion

Instead of using Task.Delay(), implement proper download monitoring:

private static async Task<string> WaitForDownloadAsync(string downloadPath, int timeoutMs = 30000)
{
    var startTime = DateTime.UtcNow;
    var timeout = TimeSpan.FromMilliseconds(timeoutMs);

    while (DateTime.UtcNow - startTime < timeout)
    {
        var files = Directory.GetFiles(downloadPath);

        // Look for completed downloads (not .crdownload files)
        var completedFiles = files.Where(f => !f.EndsWith(".crdownload") && !f.EndsWith(".tmp"));

        if (completedFiles.Any())
        {
            // Return the most recently created file
            return completedFiles.OrderByDescending(f => File.GetCreationTime(f)).First();
        }

        await Task.Delay(500); // Check every 500ms
    }

    throw new TimeoutException($"Download did not complete within {timeoutMs}ms");
}

Advanced Download Scenarios

Multiple File Downloads

public static async Task DownloadMultipleFilesAsync(Dictionary<string, string> urlSelectorPairs, string downloadDirectory)
{
    var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });

    try
    {
        var downloadTasks = urlSelectorPairs.Select(async pair =>
        {
            var page = await browser.NewPageAsync();

            await page.Client.SendAsync("Page.setDownloadBehavior", new
            {
                behavior = "allow",
                downloadPath = downloadDirectory
            });

            await page.GoToAsync(pair.Key);
            await page.ClickAsync(pair.Value);

            // Each page can be closed after triggering download
            await page.CloseAsync();
        });

        await Task.WhenAll(downloadTasks);

        // Wait for all downloads to complete
        await WaitForMultipleDownloadsAsync(downloadDirectory, urlSelectorPairs.Count);
    }
    finally
    {
        await browser.CloseAsync();
    }
}

Form-Based Downloads

public static async Task DownloadFromFormAsync(string url, Dictionary<string, string> formData, string downloadDirectory)
{
    var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
    var page = await browser.NewPageAsync();

    await page.Client.SendAsync("Page.setDownloadBehavior", new
    {
        behavior = "allow",
        downloadPath = downloadDirectory
    });

    try
    {
        await page.GoToAsync(url);

        // Fill form fields
        foreach (var field in formData)
        {
            await page.TypeAsync($"input[name='{field.Key}']", field.Value);
        }

        // Submit form and trigger download
        await page.ClickAsync("input[type='submit']");

        var downloadedFile = await WaitForDownloadAsync(downloadDirectory);
        Console.WriteLine($"Downloaded: {downloadedFile}");
    }
    finally
    {
        await browser.CloseAsync();
    }
}

Download Event Monitoring

For more precise control, you can monitor browser events:

public static async Task DownloadWithEventMonitoringAsync(string url, string selector, string downloadDirectory)
{
    var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
    var page = await browser.NewPageAsync();

    var downloadStarted = false;
    var downloadCompleted = false;

    // Monitor download events
    await page.Client.SendAsync("Browser.setDownloadBehavior", new
    {
        behavior = "allow",
        downloadPath = downloadDirectory
    });

    // Listen for download progress
    page.Client.MessageReceived += (sender, e) =>
    {
        if (e.MessageID == "Browser.downloadWillBegin")
        {
            downloadStarted = true;
            Console.WriteLine("Download started");
        }
        else if (e.MessageID == "Browser.downloadProgress")
        {
            // Handle download progress updates
            var data = e.MessageData.ToObject<dynamic>();
            if (data.state == "completed")
            {
                downloadCompleted = true;
                Console.WriteLine("Download completed");
            }
        }
    };

    await page.GoToAsync(url);
    await page.ClickAsync(selector);

    // Wait for download to start and complete
    while (!downloadStarted || !downloadCompleted)
    {
        await Task.Delay(100);
    }

    await browser.CloseAsync();
}

Error Handling and Best Practices

public static async Task<DownloadResult> SafeDownloadAsync(string url, string selector, string downloadDirectory)
{
    var result = new DownloadResult();

    try
    {
        // Validate inputs
        if (string.IsNullOrEmpty(url) || string.IsNullOrEmpty(selector))
            throw new ArgumentException("URL and selector are required");

        Directory.CreateDirectory(downloadDirectory);

        var browser = await Puppeteer.LaunchAsync(new LaunchOptions
        {
            Headless = true,
            Timeout = 30000
        });

        var page = await browser.NewPageAsync();

        // Set timeouts
        page.DefaultTimeout = 30000;
        page.DefaultNavigationTimeout = 30000;

        await page.Client.SendAsync("Page.setDownloadBehavior", new
        {
            behavior = "allow",
            downloadPath = downloadDirectory
        });

        await page.GoToAsync(url, new NavigationOptions { WaitUntil = new[] { WaitUntilNavigation.Networkidle0 } });

        // Check if download element exists
        var element = await page.QuerySelectorAsync(selector);
        if (element == null)
        {
            result.Error = $"Download element not found: {selector}";
            return result;
        }

        await page.ClickAsync(selector);

        result.FilePath = await WaitForDownloadAsync(downloadDirectory);
        result.Success = true;

        await browser.CloseAsync();
    }
    catch (Exception ex)
    {
        result.Error = ex.Message;
        result.Success = false;
    }

    return result;
}

public class DownloadResult
{
    public bool Success { get; set; }
    public string FilePath { get; set; }
    public string Error { get; set; }
}

Key Points

Always configure download behavior before navigating to the download page
Use proper download monitoring instead of arbitrary delays
Handle timeouts gracefully for large files or slow networks
Validate download completion by checking file existence and size
Clean up resources by properly closing browser instances
Consider concurrent downloads for better performance with multiple files

This approach provides robust file download handling suitable for production environments while maintaining good performance and error handling.

Table of contents

How do I handle file downloads with Puppeteer-Sharp?

Installation

Basic Download Setup

1. Configure Browser for Downloads

2. Monitor Download Completion

Advanced Download Scenarios

Multiple File Downloads

Form-Based Downloads

Download Event Monitoring

Error Handling and Best Practices

Key Points

Try WebScraping.AI for Your Web Scraping Needs

Key Features:

Getting Started:

Related Questions

How can I use selectors to find elements in Puppeteer-Sharp?

How can I set custom headers for requests in Puppeteer-Sharp?

How does Puppeteer-Sharp deal with web security features like CSP?

Get Started Now