How do I set custom headers for a request in Colly?

In Colly, you can set custom headers for HTTP requests using the OnRequest callback function. This allows you to modify request headers before they're sent to the target server.

Basic Header Setting

Use the OnRequest callback to set headers on every request:

package main

import (
    "fmt"
    "github.com/gocolly/colly/v2"
)

func main() {
    c := colly.NewCollector()

    // Set custom headers for all requests
    c.OnRequest(func(r *colly.Request) {
        r.Headers.Set("Custom-Header", "MyValue")
        r.Headers.Set("User-Agent", "MyBot/1.0")
        r.Headers.Set("Authorization", "Bearer your-token")
    })

    c.OnHTML("title", func(e *colly.HTMLElement) {
        fmt.Println("Title:", e.Text)
    })

    c.Visit("https://httpbin.org/headers")
}

Common Header Use Cases

Setting User-Agent

c.OnRequest(func(r *colly.Request) {
    r.Headers.Set("User-Agent", "Mozilla/5.0 (compatible; MyBot/1.0)")
})

Adding Authentication Headers

c.OnRequest(func(r *colly.Request) {
    r.Headers.Set("Authorization", "Bearer your-api-token")
    r.Headers.Set("X-API-Key", "your-api-key")
})

Setting Content Type for POST Requests

c.OnRequest(func(r *colly.Request) {
    if r.Method == "POST" {
        r.Headers.Set("Content-Type", "application/json")
    }
})

Dynamic Headers Based on URL

You can set different headers based on the request URL:

c.OnRequest(func(r *colly.Request) {
    if strings.Contains(r.URL.Host, "api.example.com") {
        r.Headers.Set("Authorization", "Bearer api-token")
    } else if strings.Contains(r.URL.Host, "secure.example.com") {
        r.Headers.Set("X-Custom-Auth", "secret-key")
    }

    // Always set User-Agent
    r.Headers.Set("User-Agent", "MyBot/1.0")
})

Managing Multiple Headers

c.OnRequest(func(r *colly.Request) {
    headers := map[string]string{
        "User-Agent":    "MyBot/1.0",
        "Accept":        "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Cache-Control": "no-cache",
        "Pragma":        "no-cache",
    }

    for key, value := range headers {
        r.Headers.Set(key, value)
    }
})

Headers with Context Data

Pass data through request context to set conditional headers:

c.OnRequest(func(r *colly.Request) {
    // Get context data
    if token := r.Ctx.Get("auth_token"); token != "" {
        r.Headers.Set("Authorization", "Bearer "+token)
    }
})

// Set context data when visiting
ctx := colly.NewContext()
ctx.Put("auth_token", "your-token-here")
c.Request("GET", "https://api.example.com/data", nil, ctx, nil)

Header Manipulation Methods

Besides Set(), you can use other header methods:

c.OnRequest(func(r *colly.Request) {
    // Set header (replaces existing)
    r.Headers.Set("Custom-Header", "value")

    // Add header (keeps existing, adds new)
    r.Headers.Add("Accept-Encoding", "gzip")

    // Delete header
    r.Headers.Del("Unwanted-Header")
})

Complete Example with Error Handling

package main

import (
    "fmt"
    "log"
    "github.com/gocolly/colly/v2"
    "github.com/gocolly/colly/v2/debug"
)

func main() {
    c := colly.NewCollector(
        colly.Debugger(&debug.LogDebugger{}),
    )

    // Set comprehensive headers
    c.OnRequest(func(r *colly.Request) {
        r.Headers.Set("User-Agent", "Mozilla/5.0 (compatible; MyBot/1.0)")
        r.Headers.Set("Accept", "text/html,application/xhtml+xml")
        r.Headers.Set("Accept-Language", "en-US,en;q=0.9")
        r.Headers.Set("Cache-Control", "no-cache")

        fmt.Printf("Visiting %s with headers: %v\n", r.URL, r.Headers)
    })

    c.OnResponse(func(r *colly.Response) {
        fmt.Printf("Response status: %d\n", r.StatusCode)
    })

    c.OnHTML("title", func(e *colly.HTMLElement) {
        fmt.Println("Page title:", e.Text)
    })

    c.OnError(func(r *colly.Response, err error) {
        log.Printf("Error visiting %s: %v", r.Request.URL, err)
    })

    err := c.Visit("https://httpbin.org/headers")
    if err != nil {
        log.Fatal("Visit failed:", err)
    }
}

Best Practices

  1. Always set User-Agent: Many websites block requests without proper User-Agent headers
  2. Use realistic headers: Mimic real browser headers to avoid detection
  3. Handle different domains: Set appropriate headers for different APIs or websites
  4. Include error handling: Use OnError callback to debug header-related issues
  5. Test headers: Use services like httpbin.org to verify your headers are sent correctly

The OnRequest callback is called before every request, making it the perfect place to set, modify, or conditionally apply headers based on your scraping requirements.

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon