What are the options for handling timeouts and delays in Puppeteer-Sharp?

Puppeteer-Sharp is a .NET port of the popular Node.js Puppeteer library, providing a high-level API to control headless Chrome or Chromium browsers. Effective timeout and delay management is essential for building robust web scraping and automation applications that can handle varying network conditions and page load times.

Overview of Timeout Types

Puppeteer-Sharp offers several timeout mechanisms to handle different scenarios:

  • Navigation Timeouts: Control how long to wait for page loads
  • Element Wait Timeouts: Set limits for element appearance/disappearance
  • Method-Specific Timeouts: Configure timeouts for individual operations
  • Global Timeouts: Set default timeouts for all operations
  • Custom Delays: Implement fixed or conditional waits

1. Default and Navigation Timeouts

Setting Default Timeouts

Configure global timeout defaults for all navigation and wait operations:

using PuppeteerSharp;

var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
    Headless = true,
    DefaultViewport = new ViewPortOptions { Width = 1920, Height = 1080 }
});

var page = await browser.NewPageAsync();

// Set default timeout for all operations (30 seconds)
page.DefaultTimeout = 30000;

// Set specific navigation timeout (45 seconds)
page.DefaultNavigationTimeout = 45000;

// Alternative method to set navigation timeout
await page.SetDefaultNavigationTimeoutAsync(45000);

Navigation with Custom Timeouts

Handle page navigation with specific timeout requirements:

try
{
    // Navigate with custom navigation options
    await page.GoToAsync("https://example.com", new NavigationOptions
    {
        Timeout = 60000, // 60 seconds
        WaitUntil = new[] { WaitUntilNavigation.DOMContentLoaded }
    });

    Console.WriteLine("Page loaded successfully");
}
catch (TimeoutException ex)
{
    Console.WriteLine($"Navigation timeout: {ex.Message}");
}

2. Element Wait Functions with Timeouts

Waiting for Selectors

Wait for elements to appear with configurable timeouts:

// Basic selector wait with timeout
try
{
    var element = await page.WaitForSelectorAsync("#dynamic-content", 
        new WaitForSelectorOptions 
        { 
            Timeout = 10000, // 10 seconds
            Visible = true   // Wait until element is visible
        });

    // Element found, proceed with interaction
    await element.ClickAsync();
}
catch (TimeoutException)
{
    Console.WriteLine("Element not found within timeout period");
}

// Wait for multiple possible selectors
var selectors = new[] { "#submit-btn", ".submit-button", "[type='submit']" };
foreach (var selector in selectors)
{
    try
    {
        var button = await page.WaitForSelectorAsync(selector, 
            new WaitForSelectorOptions { Timeout = 3000 });
        if (button != null)
        {
            await button.ClickAsync();
            break;
        }
    }
    catch (TimeoutException)
    {
        continue; // Try next selector
    }
}

XPath and Advanced Waits

// Wait for XPath with timeout
try
{
    var elements = await page.WaitForXPathAsync("//div[contains(@class, 'result')]", 
        new WaitForSelectorOptions { Timeout = 15000 });

    Console.WriteLine($"Found {elements.Length} result elements");
}
catch (TimeoutException)
{
    Console.WriteLine("XPath elements not found");
}

// Wait for element to be hidden/removed
await page.WaitForSelectorAsync("#loading-spinner", 
    new WaitForSelectorOptions 
    { 
        Hidden = true,   // Wait for element to be hidden
        Timeout = 20000 
    });

3. Conditional and Function-Based Waits

JavaScript Function Waits

Wait for custom JavaScript conditions to be met:

// Wait for AJAX requests to complete
await page.WaitForFunctionAsync(@"
    () => {
        return window.jQuery && window.jQuery.active === 0;
    }", new WaitForFunctionOptions { Timeout = 30000 });

// Wait for custom application state
await page.WaitForFunctionAsync(@"
    () => {
        return window.appReady === true && 
               document.readyState === 'complete' &&
               !document.querySelector('.loading');
    }", new WaitForFunctionOptions 
    { 
        Timeout = 25000,
        PollingInterval = 500 // Check every 500ms
    });

// Wait for element count to reach expected value
await page.WaitForFunctionAsync(@"
    (expectedCount) => {
        return document.querySelectorAll('.product-item').length >= expectedCount;
    }", new WaitForFunctionOptions { Timeout = 15000 }, 10);

4. Network-Based Waiting Strategies

Network Idle Waits

Wait for network activity to settle before proceeding:

// Wait until no network requests for 500ms
await page.GoToAsync("https://spa-example.com", new NavigationOptions
{
    WaitUntil = new[] { WaitUntilNavigation.Networkidle0 }, // No requests for 500ms
    Timeout = 30000
});

// Wait until no more than 2 requests for 500ms
await page.GoToAsync("https://dynamic-site.com", new NavigationOptions
{
    WaitUntil = new[] { WaitUntilNavigation.Networkidle2 }, // Max 2 requests for 500ms
    Timeout = 45000
});

// Multiple wait conditions
await page.GoToAsync("https://complex-app.com", new NavigationOptions
{
    WaitUntil = new[] 
    { 
        WaitUntilNavigation.DOMContentLoaded,
        WaitUntilNavigation.Networkidle0 
    },
    Timeout = 60000
});

5. Method-Specific Timeouts

Interaction Timeouts

Configure timeouts for user interactions:

// Click with timeout
try
{
    await page.ClickAsync("#submit-button", new ClickOptions 
    { 
        Timeout = 5000,
        Delay = 100 // Add 100ms delay before clicking
    });
}
catch (TimeoutException)
{
    Console.WriteLine("Click operation timed out");
}

// Type text with timeout
await page.TypeAsync("#search-input", "search query", new TypeOptions
{
    Timeout = 10000,
    Delay = 50 // 50ms delay between keystrokes
});

// Focus with timeout
await page.FocusAsync("#email-field", new PageFocusOptions { Timeout = 3000 });

// Select option with timeout
await page.SelectAsync("#country-select", "US", new SelectOption { Timeout = 5000 });

6. Custom Delay Strategies

Fixed and Dynamic Delays

Implement custom timing strategies:

// Simple fixed delay
await Task.Delay(2000); // Wait 2 seconds

// Random delay to simulate human behavior
var random = new Random();
var delay = random.Next(1000, 3000); // Random delay between 1-3 seconds
await Task.Delay(delay);

// Progressive delay with retry logic
async Task<bool> WaitForElementWithRetry(IPage page, string selector, int maxRetries = 3)
{
    for (int i = 0; i < maxRetries; i++)
    {
        try
        {
            await page.WaitForSelectorAsync(selector, new WaitForSelectorOptions { Timeout = 5000 });
            return true;
        }
        catch (TimeoutException)
        {
            if (i < maxRetries - 1)
            {
                var backoffDelay = (int)Math.Pow(2, i) * 1000; // Exponential backoff
                await Task.Delay(backoffDelay);
                Console.WriteLine($"Retry {i + 1}/{maxRetries} after {backoffDelay}ms delay");
            }
        }
    }
    return false;
}

7. CancellationToken Integration

Advanced Timeout Control

Use CancellationTokens for fine-grained timeout control:

using System.Threading;

// Create cancellation token with timeout
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));

try
{
    // Apply cancellation token to operations
    await page.GoToAsync("https://slow-loading-site.com", 
        new NavigationOptions { WaitUntil = new[] { WaitUntilNavigation.DOMContentLoaded } },
        cts.Token);

    await page.WaitForSelectorAsync("#content", 
        new WaitForSelectorOptions { Timeout = 15000 }, 
        cts.Token);

    var result = await page.EvaluateExpressionAsync<string>("document.title", cts.Token);

    Console.WriteLine($"Page title: {result}");
}
catch (OperationCanceledException)
{
    Console.WriteLine("Operation was cancelled due to timeout");
}
catch (TimeoutException ex)
{
    Console.WriteLine($"Specific timeout occurred: {ex.Message}");
}

8. Best Practices and Error Handling

Comprehensive Timeout Strategy

public class RobustPageHandler
{
    private readonly IPage _page;
    private readonly int _defaultTimeout;

    public RobustPageHandler(IPage page, int defaultTimeoutMs = 30000)
    {
        _page = page;
        _defaultTimeout = defaultTimeoutMs;

        // Set page defaults
        _page.DefaultTimeout = defaultTimeoutMs;
        _page.DefaultNavigationTimeout = defaultTimeoutMs + 15000; // Slightly longer for navigation
    }

    public async Task<bool> SafeNavigateAsync(string url, int? customTimeout = null)
    {
        var timeout = customTimeout ?? _defaultTimeout;

        try
        {
            await _page.GoToAsync(url, new NavigationOptions
            {
                Timeout = timeout,
                WaitUntil = new[] { WaitUntilNavigation.DOMContentLoaded }
            });

            // Wait for basic page stability
            await _page.WaitForFunctionAsync("document.readyState === 'complete'", 
                new WaitForFunctionOptions { Timeout = 10000 });

            return true;
        }
        catch (Exception ex) when (ex is TimeoutException || ex is NavigationException)
        {
            Console.WriteLine($"Navigation failed: {ex.Message}");
            return false;
        }
    }

    public async Task<IElementHandle> SafeWaitForElementAsync(string selector, 
        bool mustBeVisible = true, int? customTimeout = null)
    {
        var timeout = customTimeout ?? _defaultTimeout;

        try
        {
            return await _page.WaitForSelectorAsync(selector, new WaitForSelectorOptions
            {
                Timeout = timeout,
                Visible = mustBeVisible
            });
        }
        catch (TimeoutException)
        {
            Console.WriteLine($"Element '{selector}' not found within {timeout}ms");
            return null;
        }
    }
}

// Usage example
var handler = new RobustPageHandler(page, 25000);
var success = await handler.SafeNavigateAsync("https://example.com");

if (success)
{
    var element = await handler.SafeWaitForElementAsync("#important-content");
    if (element != null)
    {
        // Proceed with element interaction
        await element.ClickAsync();
    }
}

Common Timeout Scenarios

E-commerce Site Scraping

// Handle dynamic product loading
await page.GoToAsync("https://shop.example.com/products");

// Wait for products to load
await page.WaitForSelectorAsync(".product-grid", new WaitForSelectorOptions { Timeout = 15000 });

// Wait for all product images to load
await page.WaitForFunctionAsync(@"
    () => {
        const images = document.querySelectorAll('.product-image img');
        return Array.from(images).every(img => img.complete);
    }", new WaitForFunctionOptions { Timeout = 30000 });

Form Submission Handling

// Fill and submit form with proper timing
await page.TypeAsync("#email", "user@example.com");
await Task.Delay(100); // Small delay between fields

await page.TypeAsync("#password", "password123");
await Task.Delay(100);

await page.ClickAsync("#submit-button");

// Wait for submission result
try
{
    await page.WaitForSelectorAsync(".success-message", 
        new WaitForSelectorOptions { Timeout = 10000 });
    Console.WriteLine("Form submitted successfully");
}
catch (TimeoutException)
{
    // Check for error message instead
    var errorElement = await page.QuerySelectorAsync(".error-message");
    if (errorElement != null)
    {
        var errorText = await errorElement.GetPropertyAsync("textContent");
        Console.WriteLine($"Form error: {errorText}");
    }
}

Proper timeout and delay management in Puppeteer-Sharp ensures your automation scripts are reliable, can handle various network conditions, and provide a better user experience by avoiding unnecessary failures due to timing issues.

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon