SwiftSoup is a pure Swift HTML parsing library that provides a convenient API for extracting and manipulating data using DOM, CSS, and jQuery-like methods. However, SwiftSoup only parses HTML content - it doesn't handle HTTP requests or cookie management. For network operations with cookies, you need to use URLSession
alongside SwiftSoup.
Understanding the SwiftSoup + URLSession Workflow
The typical workflow combines these technologies: 1. URLSession - Makes HTTP requests and manages cookies 2. SwiftSoup - Parses the received HTML content
Setting Cookies
Basic Cookie Creation and Storage
import Foundation
import SwiftSoup
// Create a URL
guard let url = URL(string: "https://example.com") else { return }
// Define cookie properties
let cookieProperties: [HTTPCookiePropertyKey: Any] = [
.domain: url.host!,
.path: "/",
.name: "sessionId",
.value: "abc123xyz",
.secure: "TRUE",
.httpOnly: "TRUE",
.expires: Date(timeIntervalSinceNow: 3600) // 1 hour from now
]
// Create and store the cookie
if let cookie = HTTPCookie(properties: cookieProperties) {
HTTPCookieStorage.shared.setCookie(cookie)
print("Cookie set: \(cookie.name)=\(cookie.value)")
}
Setting Multiple Cookies
let cookies = [
["name": "auth_token", "value": "token123"],
["name": "user_pref", "value": "dark_mode"],
["name": "session_lang", "value": "en"]
]
for cookieData in cookies {
let properties: [HTTPCookiePropertyKey: Any] = [
.domain: url.host!,
.path: "/",
.name: cookieData["name"]!,
.value: cookieData["value"]!,
.secure: "TRUE"
]
if let cookie = HTTPCookie(properties: properties) {
HTTPCookieStorage.shared.setCookie(cookie)
}
}
Complete Example: HTTP Request with Cookie and SwiftSoup Parsing
import Foundation
import SwiftSoup
func scrapeWithCookies() {
guard let url = URL(string: "https://example.com/protected-page") else { return }
// Set authentication cookie
let cookieProperties: [HTTPCookiePropertyKey: Any] = [
.domain: url.host!,
.path: "/",
.name: "auth_token",
.value: "your_auth_token_here",
.secure: "TRUE"
]
if let cookie = HTTPCookie(properties: cookieProperties) {
HTTPCookieStorage.shared.setCookie(cookie)
}
// Make request with cookies
let task = URLSession.shared.dataTask(with: url) { data, response, error in
guard let data = data, error == nil else {
print("Network error: \(error?.localizedDescription ?? "Unknown")")
return
}
// Convert to string
guard let htmlContent = String(data: data, encoding: .utf8) else {
print("Failed to decode HTML")
return
}
// Parse with SwiftSoup
do {
let doc = try SwiftSoup.parse(htmlContent)
let title = try doc.title()
let links = try doc.select("a[href]")
print("Page title: \(title)")
print("Found \(links.count) links")
// Extract specific data
for link in links {
let href = try link.attr("href")
let text = try link.text()
print("Link: \(text) -> \(href)")
}
} catch {
print("SwiftSoup parsing error: \(error)")
}
}
task.resume()
}
Removing Cookies
Remove Specific Cookie
// Method 1: Remove by recreating cookie object
let cookieProperties: [HTTPCookiePropertyKey: Any] = [
.domain: "example.com",
.path: "/",
.name: "sessionId",
.value: "" // Value doesn't matter for deletion
]
if let cookie = HTTPCookie(properties: cookieProperties) {
HTTPCookieStorage.shared.deleteCookie(cookie)
print("Cookie removed: \(cookie.name)")
}
// Method 2: Find and remove existing cookie
if let existingCookies = HTTPCookieStorage.shared.cookies {
for cookie in existingCookies {
if cookie.name == "sessionId" && cookie.domain == "example.com" {
HTTPCookieStorage.shared.deleteCookie(cookie)
print("Found and removed cookie: \(cookie.name)")
break
}
}
}
Remove All Cookies for a Domain
guard let url = URL(string: "https://example.com") else { return }
if let cookies = HTTPCookieStorage.shared.cookies(for: url) {
print("Removing \(cookies.count) cookies for \(url.host!)")
for cookie in cookies {
HTTPCookieStorage.shared.deleteCookie(cookie)
print("Removed: \(cookie.name)")
}
}
Remove All Cookies
// ⚠️ Use with caution - removes all cookies from all domains
if let allCookies = HTTPCookieStorage.shared.cookies {
for cookie in allCookies {
HTTPCookieStorage.shared.deleteCookie(cookie)
}
print("All cookies removed")
}
Advanced Cookie Management
Custom URLSession Configuration
// Create custom session configuration
let config = URLSessionConfiguration.default
config.httpCookieAcceptPolicy = .always
config.httpShouldSetCookies = true
// Create custom cookie storage
let customCookieStorage = HTTPCookieStorage()
config.httpCookieStorage = customCookieStorage
let customSession = URLSession(configuration: config)
// Now use customSession for requests with isolated cookie storage
let task = customSession.dataTask(with: url) { data, response, error in
// Handle response and parse with SwiftSoup
}
task.resume()
Ephemeral Session (No Cookie Storage)
// Session that doesn't store cookies
let ephemeralConfig = URLSessionConfiguration.ephemeral
ephemeralConfig.httpShouldSetCookies = false
let ephemeralSession = URLSession(configuration: ephemeralConfig)
let task = ephemeralSession.dataTask(with: url) { data, response, error in
// This request won't send or store any cookies
if let data = data, let html = String(data: data, encoding: .utf8) {
// Parse with SwiftSoup
do {
let doc = try SwiftSoup.parse(html)
// Process document...
} catch {
print("Parsing error: \(error)")
}
}
}
task.resume()
Extracting Cookies from Response
let task = URLSession.shared.dataTask(with: url) { data, response, error in
// Extract cookies from response
if let httpResponse = response as? HTTPURLResponse,
let url = response?.url {
let cookies = HTTPCookie.cookies(withResponseHeaderFields:
httpResponse.allHeaderFields as! [String: String], for: url)
print("Response set \(cookies.count) cookies:")
for cookie in cookies {
print("- \(cookie.name)=\(cookie.value)")
// Store cookies manually if needed
HTTPCookieStorage.shared.setCookie(cookie)
}
}
// Parse HTML with SwiftSoup
if let data = data, let html = String(data: data, encoding: .utf8) {
do {
let doc = try SwiftSoup.parse(html)
// Process parsed content...
} catch {
print("SwiftSoup error: \(error)")
}
}
}
task.resume()
Best Practices
- Cookie Persistence:
HTTPCookieStorage.shared
persists cookies across app sessions - Domain Matching: Ensure cookie domains match your target URLs
- Security: Use
secure
andhttpOnly
flags for sensitive cookies - Error Handling: Always handle potential failures in cookie creation and network requests
- Memory Management: Consider using ephemeral sessions for temporary scraping tasks
- Debugging: Print cookie properties during development to verify correct setup
Key Takeaways
- SwiftSoup: HTML parsing only
- URLSession: HTTP requests and cookie management
- HTTPCookieStorage: Persistent cookie storage across requests
- Custom configurations: Control cookie behavior per session
- Integration: Combine both libraries for complete web scraping solutions