Table of contents

How do I handle cookies and sessions in Swift web scraping?

Handling cookies and sessions is crucial for Swift web scraping, enabling your scraper to maintain state across HTTP requests like a browser. This is essential for preserving login states, handling session-specific data, and managing CSRF tokens or other security mechanisms that rely on cookies.

Swift's URLSession provides robust cookie and session management capabilities. Here's a comprehensive guide to implementing cookie handling in your Swift web scraping projects.

Basic URLSession Configuration for Cookie Handling

Setting Up Cookie-Enabled URLSession

import Foundation

class WebScraper {
    private let session: URLSession

    init() {
        let config = URLSessionConfiguration.default
        config.httpCookieAcceptPolicy = .always
        config.httpShouldSetCookies = true
        config.httpCookieStorage = HTTPCookieStorage.shared

        // Optional: Set custom user agent
        config.httpAdditionalHeaders = ["User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"]

        self.session = URLSession(configuration: config)
    }
}

Different Cookie Storage Options

// Shared storage (default)
config.httpCookieStorage = HTTPCookieStorage.shared

// Isolated storage for specific scraping session
config.httpCookieStorage = HTTPCookieStorage()

// No cookie storage (disable cookies)
config.httpCookieStorage = nil

Login and Authentication with Sessions

Modern Async/Await Login Implementation

import Foundation

extension WebScraper {
    func login(username: String, password: String) async throws -> Bool {
        guard let url = URL(string: "https://example.com/login") else {
            throw ScrapingError.invalidURL
        }

        var request = URLRequest(url: url)
        request.httpMethod = "POST"
        request.setValue("application/x-www-form-urlencoded", forHTTPHeaderField: "Content-Type")

        let loginData = "username=\(username)&password=\(password)"
        request.httpBody = loginData.data(using: .utf8)

        let (data, response) = try await session.data(for: request)

        guard let httpResponse = response as? HTTPURLResponse else {
            throw ScrapingError.invalidResponse
        }

        // Check for successful login (status code 200-299)
        let isSuccess = 200...299 ~= httpResponse.statusCode

        if isSuccess {
            // Print received cookies for debugging
            printCookies(for: url)
        }

        return isSuccess
    }

    private func printCookies(for url: URL) {
        if let cookies = HTTPCookieStorage.shared.cookies(for: url) {
            print("Received cookies:")
            for cookie in cookies {
                print("  \(cookie.name) = \(cookie.value) (expires: \(cookie.expiresDate?.description ?? "session"))")
            }
        }
    }
}

Handling CSRF Tokens

extension WebScraper {
    func getCSRFToken(from url: URL) async throws -> String? {
        let (data, _) = try await session.data(from: url)

        guard let html = String(data: data, encoding: .utf8) else {
            return nil
        }

        // Extract CSRF token using regex or string parsing
        let pattern = #"<meta name="csrf-token" content="([^"]+)""#
        let regex = try NSRegularExpression(pattern: pattern)
        let range = NSRange(html.startIndex..., in: html)

        if let match = regex.firstMatch(in: html, options: [], range: range) {
            return String(html[Range(match.range(at: 1), in: html)!])
        }

        return nil
    }

    func loginWithCSRF(username: String, password: String) async throws -> Bool {
        // First, get the login page to extract CSRF token
        guard let loginPageURL = URL(string: "https://example.com/login") else {
            throw ScrapingError.invalidURL
        }

        let csrfToken = try await getCSRFToken(from: loginPageURL)

        var request = URLRequest(url: loginPageURL)
        request.httpMethod = "POST"
        request.setValue("application/x-www-form-urlencoded", forHTTPHeaderField: "Content-Type")

        var loginData = "username=\(username)&password=\(password)"
        if let token = csrfToken {
            loginData += "&_token=\(token)"
        }

        request.httpBody = loginData.data(using: .utf8)

        let (_, response) = try await session.data(for: request)

        guard let httpResponse = response as? HTTPURLResponse else {
            throw ScrapingError.invalidResponse
        }

        return 200...299 ~= httpResponse.statusCode
    }
}

Advanced Cookie Management

Custom Cookie Creation and Manipulation

extension WebScraper {
    func setCookie(name: String, value: String, domain: String, path: String = "/") {
        let cookie = HTTPCookie(properties: [
            .name: name,
            .value: value,
            .domain: domain,
            .path: path,
            .secure: true,
            .httpOnly: true
        ])

        if let cookie = cookie {
            HTTPCookieStorage.shared.setCookie(cookie)
        }
    }

    func getCookieValue(name: String, for url: URL) -> String? {
        guard let cookies = HTTPCookieStorage.shared.cookies(for: url) else {
            return nil
        }

        return cookies.first { $0.name == name }?.value
    }

    func deleteCookie(name: String, for url: URL) {
        guard let cookies = HTTPCookieStorage.shared.cookies(for: url),
              let cookie = cookies.first(where: { $0.name == name }) else {
            return
        }

        HTTPCookieStorage.shared.deleteCookie(cookie)
    }

    func clearAllCookies() {
        HTTPCookieStorage.shared.removeCookies(since: Date.distantPast)
    }
}

Secure Cookie Persistence

Modern Secure Persistence Implementation

import Foundation
import Security

class CookieManager {
    private let serviceIdentifier = "com.yourapp.webscraper.cookies"

    func saveCookies() throws {
        guard let cookies = HTTPCookieStorage.shared.cookies else { return }

        let cookiesData = try NSKeyedArchiver.archivedData(
            withRootObject: cookies,
            requiringSecureCoding: true
        )

        try saveToKeychain(data: cookiesData, identifier: serviceIdentifier)
    }

    func loadCookies() throws {
        let cookiesData = try loadFromKeychain(identifier: serviceIdentifier)

        guard let cookies = try NSKeyedUnarchiver.unarchivedObject(
            ofClasses: [NSArray.self, HTTPCookie.self],
            from: cookiesData
        ) as? [HTTPCookie] else {
            throw CookieError.invalidData
        }

        for cookie in cookies {
            HTTPCookieStorage.shared.setCookie(cookie)
        }
    }

    private func saveToKeychain(data: Data, identifier: String) throws {
        let query: [String: Any] = [
            kSecClass as String: kSecClassGenericPassword,
            kSecAttrService as String: identifier,
            kSecValueData as String: data
        ]

        SecItemDelete(query as CFDictionary)

        let status = SecItemAdd(query as CFDictionary, nil)
        guard status == errSecSuccess else {
            throw CookieError.keychainError(status)
        }
    }

    private func loadFromKeychain(identifier: String) throws -> Data {
        let query: [String: Any] = [
            kSecClass as String: kSecClassGenericPassword,
            kSecAttrService as String: identifier,
            kSecReturnData as String: true,
            kSecMatchLimit as String: kSecMatchLimitOne
        ]

        var result: AnyObject?
        let status = SecItemCopyMatching(query as CFDictionary, &result)

        guard status == errSecSuccess,
              let data = result as? Data else {
            throw CookieError.keychainError(status)
        }

        return data
    }
}

Complete Web Scraping Session Example

Full Implementation with Error Handling

import Foundation

class WebScrapingSession {
    private let session: URLSession
    private let cookieManager = CookieManager()

    init() {
        let config = URLSessionConfiguration.default
        config.httpCookieAcceptPolicy = .always
        config.httpShouldSetCookies = true
        config.timeoutIntervalForRequest = 30
        config.timeoutIntervalForResource = 60

        self.session = URLSession(configuration: config)
    }

    func startScraping() async throws {
        // Load previously saved cookies
        try? cookieManager.loadCookies()

        // Login
        let loginSuccess = try await login(username: "your_username", password: "your_password")
        guard loginSuccess else {
            throw ScrapingError.loginFailed
        }

        // Scrape protected content
        let protectedData = try await scrapeProtectedPage()

        // Process data
        print("Scraped data: \(protectedData)")

        // Save cookies for next session
        try cookieManager.saveCookies()
    }

    private func login(username: String, password: String) async throws -> Bool {
        guard let url = URL(string: "https://example.com/login") else {
            throw ScrapingError.invalidURL
        }

        var request = URLRequest(url: url)
        request.httpMethod = "POST"
        request.setValue("application/x-www-form-urlencoded", forHTTPHeaderField: "Content-Type")

        let loginData = "username=\(username)&password=\(password)"
        request.httpBody = loginData.data(using: .utf8)

        let (_, response) = try await session.data(for: request)

        guard let httpResponse = response as? HTTPURLResponse else {
            throw ScrapingError.invalidResponse
        }

        return 200...299 ~= httpResponse.statusCode
    }

    private func scrapeProtectedPage() async throws -> String {
        guard let url = URL(string: "https://example.com/protected-content") else {
            throw ScrapingError.invalidURL
        }

        let (data, response) = try await session.data(from: url)

        guard let httpResponse = response as? HTTPURLResponse,
              200...299 ~= httpResponse.statusCode else {
            throw ScrapingError.accessDenied
        }

        return String(data: data, encoding: .utf8) ?? ""
    }
}

// Error definitions
enum ScrapingError: Error, LocalizedError {
    case invalidURL
    case invalidResponse
    case loginFailed
    case accessDenied

    var errorDescription: String? {
        switch self {
        case .invalidURL:
            return "Invalid URL provided"
        case .invalidResponse:
            return "Invalid response received"
        case .loginFailed:
            return "Login authentication failed"
        case .accessDenied:
            return "Access denied to protected resource"
        }
    }
}

enum CookieError: Error {
    case invalidData
    case keychainError(OSStatus)
}

Best Practices and Considerations

Session Management Tips

  1. Use dedicated URLSession instances for different scraping tasks to avoid cookie conflicts
  2. Implement proper timeout handling to prevent hanging requests
  3. Handle cookie expiration by checking expiresDate property
  4. Respect robots.txt and implement rate limiting
  5. Use HTTPS whenever possible for secure cookie transmission

Cookie Security

  • Store sensitive cookies in Keychain rather than UserDefaults
  • Implement secure coding practices when archiving/unarchiving cookies
  • Clear cookies when appropriate to maintain privacy
  • Be aware of SameSite and Secure cookie attributes

Legal and Ethical Considerations

Always ensure your web scraping activities comply with: - Website terms of service and robots.txt - Data protection regulations (GDPR, CCPA, etc.) - Rate limiting and respectful scraping practices - Copyright and intellectual property laws

This comprehensive approach to cookie and session management in Swift provides a robust foundation for your web scraping projects while maintaining security and compliance standards.

Try WebScraping.AI for Your Web Scraping Needs

Looking for a powerful web scraping solution? WebScraping.AI provides an LLM-powered API that combines Chromium JavaScript rendering with rotating proxies for reliable data extraction.

Key Features:

  • AI-powered extraction: Ask questions about web pages or extract structured data fields
  • JavaScript rendering: Full Chromium browser support for dynamic content
  • Rotating proxies: Datacenter and residential proxies from multiple countries
  • Easy integration: Simple REST API with SDKs for Python, Ruby, PHP, and more
  • Reliable & scalable: Built for developers who need consistent results

Getting Started:

Get page content with AI analysis:

curl "https://api.webscraping.ai/ai/question?url=https://example.com&question=What is the main topic?&api_key=YOUR_API_KEY"

Extract structured data:

curl "https://api.webscraping.ai/ai/fields?url=https://example.com&fields[title]=Page title&fields[price]=Product price&api_key=YOUR_API_KEY"

Try in request builder

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon