How do I process compressed HTTP responses in Go?

Processing compressed HTTP responses in Go is essential for efficient web scraping and API consumption. Compression can reduce response sizes by 60-80%, significantly improving performance and reducing bandwidth costs.

Understanding HTTP Compression

HTTP compression uses algorithms like gzip, deflate, and br (Brotli) to compress response bodies. The client indicates support via the Accept-Encoding header, and the server responds with the Content-Encoding header specifying the compression used.

Automatic Compression Handling

Go's http package automatically handles gzip compression when using http.Get() or http.DefaultClient:

package main

import (
    "fmt"
    "io"
    "net/http"
)

func main() {
    // Automatic gzip decompression
    resp, err := http.Get("https://httpbin.org/gzip")
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Printf("Response: %s\n", string(body))
    fmt.Printf("Content-Encoding: %s\n", resp.Header.Get("Content-Encoding"))
}

Manual Compression Handling

For more control or to handle additional encodings, implement manual decompression:

package main

import (
    "compress/flate"
    "compress/gzip"
    "fmt"
    "io"
    "net/http"
    "strings"
)

func main() {
    url := "https://httpbin.org/deflate"

    // Create request with compression support
    req, err := http.NewRequest("GET", url, nil)
    if err != nil {
        panic(err)
    }

    // Request multiple compression formats
    req.Header.Set("Accept-Encoding", "gzip, deflate, br")

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    // Get decompressed reader
    reader, err := getDecompressedReader(resp)
    if err != nil {
        panic(err)
    }
    defer reader.Close()

    body, err := io.ReadAll(reader)
    if err != nil {
        panic(err)
    }

    fmt.Printf("Response: %s\n", string(body))
    fmt.Printf("Content-Encoding: %s\n", resp.Header.Get("Content-Encoding"))
}

func getDecompressedReader(resp *http.Response) (io.ReadCloser, error) {
    encoding := resp.Header.Get("Content-Encoding")

    switch {
    case strings.Contains(encoding, "gzip"):
        return gzip.NewReader(resp.Body)
    case strings.Contains(encoding, "deflate"):
        return flate.NewReader(resp.Body), nil
    default:
        return resp.Body, nil
    }
}

Complete Example with Error Handling

Here's a production-ready example with comprehensive error handling:

package main

import (
    "compress/flate"
    "compress/gzip"
    "fmt"
    "io"
    "net/http"
    "strings"
    "time"
)

type CompressionClient struct {
    client *http.Client
}

func NewCompressionClient() *CompressionClient {
    return &CompressionClient{
        client: &http.Client{
            Timeout: 30 * time.Second,
        },
    }
}

func (c *CompressionClient) Get(url string) ([]byte, error) {
    req, err := http.NewRequest("GET", url, nil)
    if err != nil {
        return nil, fmt.Errorf("creating request: %w", err)
    }

    // Accept multiple compression formats
    req.Header.Set("Accept-Encoding", "gzip, deflate")
    req.Header.Set("User-Agent", "Go-HTTP-Client/1.1")

    resp, err := c.client.Do(req)
    if err != nil {
        return nil, fmt.Errorf("performing request: %w", err)
    }
    defer resp.Body.Close()

    if resp.StatusCode != http.StatusOK {
        return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
    }

    reader, err := c.getReader(resp)
    if err != nil {
        return nil, fmt.Errorf("creating decompressed reader: %w", err)
    }
    defer reader.Close()

    body, err := io.ReadAll(reader)
    if err != nil {
        return nil, fmt.Errorf("reading response body: %w", err)
    }

    return body, nil
}

func (c *CompressionClient) getReader(resp *http.Response) (io.ReadCloser, error) {
    encoding := strings.ToLower(resp.Header.Get("Content-Encoding"))

    switch {
    case strings.Contains(encoding, "gzip"):
        return gzip.NewReader(resp.Body)
    case strings.Contains(encoding, "deflate"):
        return flate.NewReader(resp.Body), nil
    case encoding == "":
        // No compression
        return resp.Body, nil
    default:
        return nil, fmt.Errorf("unsupported encoding: %s", encoding)
    }
}

func main() {
    client := NewCompressionClient()

    urls := []string{
        "https://httpbin.org/gzip",
        "https://httpbin.org/deflate",
        "https://httpbin.org/json",
    }

    for _, url := range urls {
        fmt.Printf("Fetching: %s\n", url)

        body, err := client.Get(url)
        if err != nil {
            fmt.Printf("Error: %v\n", err)
            continue
        }

        fmt.Printf("Response size: %d bytes\n", len(body))
        fmt.Printf("First 100 chars: %.100s...\n\n", string(body))
    }
}

Key Best Practices

  1. Always defer close readers: Both response bodies and decompression readers must be closed
  2. Check Content-Encoding: Never assume compression is used; always check the header
  3. Handle multiple encodings: Support gzip, deflate, and optionally Brotli
  4. Use case-insensitive comparisons: Header values may vary in case
  5. Set timeouts: Prevent hanging connections when dealing with compressed streams
  6. Error handling: Wrap errors with context for better debugging

Performance Considerations

  • Automatic vs Manual: Use automatic handling for simple cases, manual for custom requirements
  • Memory usage: Compressed responses use less bandwidth but more CPU and memory during decompression
  • Streaming: For large responses, consider streaming decompression instead of loading everything into memory

Common Gotchas

  1. Double decompression: Don't manually decompress when using http.Get() - it's automatic for gzip
  2. Empty responses: Some servers send compressed empty responses that can cause reader errors
  3. Partial content: Range requests with compression can be tricky
  4. Transfer-Encoding vs Content-Encoding: These are different - Transfer-Encoding is hop-by-hop

This approach ensures your Go applications can efficiently handle compressed HTTP responses while maintaining robust error handling and performance.

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon