How do I handle web scraping analytics and monitoring in Swift apps?
Implementing comprehensive analytics and monitoring in Swift web scraping applications is crucial for maintaining reliability, tracking performance, and identifying issues before they impact your users. This guide covers essential monitoring techniques, performance tracking, error handling, and logging strategies specifically tailored for Swift-based scraping applications.
Core Analytics Components
1. Performance Metrics Tracking
Start by implementing a performance monitoring system that tracks key metrics like request duration, success rates, and data throughput:
import Foundation
class ScrapingAnalytics {
private var metrics: [String: Any] = [:]
private let queue = DispatchQueue(label: "analytics.queue", qos: .background)
struct RequestMetrics {
let url: String
let duration: TimeInterval
let statusCode: Int?
let dataSize: Int
let timestamp: Date
let success: Bool
}
func trackRequest(_ metrics: RequestMetrics) {
queue.async {
self.recordMetric("request_count", increment: 1)
self.recordMetric("total_duration", value: metrics.duration)
self.recordMetric("data_transferred", value: Double(metrics.dataSize))
if metrics.success {
self.recordMetric("success_count", increment: 1)
} else {
self.recordMetric("error_count", increment: 1)
}
self.logRequest(metrics)
}
}
private func recordMetric(_ key: String, value: Double? = nil, increment: Int? = nil) {
if let increment = increment {
metrics[key] = (metrics[key] as? Int ?? 0) + increment
} else if let value = value {
var values = metrics[key] as? [Double] ?? []
values.append(value)
metrics[key] = values
}
}
func getAverageResponseTime() -> Double {
guard let durations = metrics["total_duration"] as? [Double] else { return 0 }
return durations.reduce(0, +) / Double(durations.count)
}
func getSuccessRate() -> Double {
let successCount = metrics["success_count"] as? Int ?? 0
let totalCount = metrics["request_count"] as? Int ?? 1
return Double(successCount) / Double(totalCount) * 100
}
}
2. Custom URL Session with Analytics
Create a custom URLSession wrapper that automatically tracks all network requests:
class AnalyticsURLSession {
private let session: URLSession
private let analytics: ScrapingAnalytics
init(configuration: URLSessionConfiguration = .default, analytics: ScrapingAnalytics) {
self.session = URLSession(configuration: configuration)
self.analytics = analytics
}
func dataTask(with request: URLRequest, completionHandler: @escaping (Data?, URLResponse?, Error?) -> Void) -> URLSessionDataTask {
let startTime = Date()
return session.dataTask(with: request) { [weak self] data, response, error in
let endTime = Date()
let duration = endTime.timeIntervalSince(startTime)
let metrics = ScrapingAnalytics.RequestMetrics(
url: request.url?.absoluteString ?? "unknown",
duration: duration,
statusCode: (response as? HTTPURLResponse)?.statusCode,
dataSize: data?.count ?? 0,
timestamp: startTime,
success: error == nil && (response as? HTTPURLResponse)?.statusCode == 200
)
self?.analytics.trackRequest(metrics)
completionHandler(data, response, error)
}
}
}
Error Tracking and Alerting
1. Comprehensive Error Monitoring
Implement a robust error tracking system that categorizes and reports different types of failures:
enum ScrapingError: Error {
case networkTimeout
case invalidResponse
case parsingError
case rateLimitExceeded
case authenticationFailed
case serverError(Int)
case unknown(Error)
}
class ErrorTracker {
private var errorCounts: [String: Int] = [:]
private let alertThreshold: Int = 10
func trackError(_ error: ScrapingError, context: [String: Any] = [:]) {
let errorKey = String(describing: error)
errorCounts[errorKey] = (errorCounts[errorKey] ?? 0) + 1
// Log error with context
logError(error, context: context)
// Check if we should send an alert
if let count = errorCounts[errorKey], count >= alertThreshold {
sendAlert(for: error, count: count)
}
}
private func logError(_ error: ScrapingError, context: [String: Any]) {
let logData: [String: Any] = [
"error": String(describing: error),
"timestamp": ISO8601DateFormatter().string(from: Date()),
"context": context
]
print("ERROR: \(logData)")
// Send to your logging service
}
private func sendAlert(for error: ScrapingError, count: Int) {
// Implement your alerting logic here
print("ALERT: Error \(error) occurred \(count) times")
}
}
2. Health Check System
Implement periodic health checks to monitor the overall system status:
class HealthMonitor {
private let analytics: ScrapingAnalytics
private let errorTracker: ErrorTracker
private var healthCheckTimer: Timer?
init(analytics: ScrapingAnalytics, errorTracker: ErrorTracker) {
self.analytics = analytics
self.errorTracker = errorTracker
startHealthChecks()
}
private func startHealthChecks() {
healthCheckTimer = Timer.scheduledTimer(withTimeInterval: 300, repeats: true) { [weak self] _ in
self?.performHealthCheck()
}
}
private func performHealthCheck() {
let successRate = analytics.getSuccessRate()
let avgResponseTime = analytics.getAverageResponseTime()
let healthStatus = HealthStatus(
successRate: successRate,
averageResponseTime: avgResponseTime,
timestamp: Date()
)
if successRate < 90 {
print("WARNING: Success rate below threshold: \(successRate)%")
}
if avgResponseTime > 5.0 {
print("WARNING: Average response time high: \(avgResponseTime)s")
}
logHealthStatus(healthStatus)
}
struct HealthStatus {
let successRate: Double
let averageResponseTime: Double
let timestamp: Date
}
private func logHealthStatus(_ status: HealthStatus) {
let logData: [String: Any] = [
"success_rate": status.successRate,
"avg_response_time": status.averageResponseTime,
"timestamp": ISO8601DateFormatter().string(from: status.timestamp)
]
print("HEALTH: \(logData)")
}
}
Logging and Data Collection
1. Structured Logging
Implement structured logging for better analysis and debugging:
enum LogLevel: String {
case debug = "DEBUG"
case info = "INFO"
case warning = "WARNING"
case error = "ERROR"
}
class ScrapingLogger {
private let logQueue = DispatchQueue(label: "logging.queue", qos: .utility)
func log(_ level: LogLevel, message: String, data: [String: Any] = [:]) {
logQueue.async {
let logEntry = self.createLogEntry(level: level, message: message, data: data)
self.writeLog(logEntry)
}
}
private func createLogEntry(level: LogLevel, message: String, data: [String: Any]) -> [String: Any] {
var logEntry: [String: Any] = [
"level": level.rawValue,
"message": message,
"timestamp": ISO8601DateFormatter().string(from: Date()),
"thread": Thread.current.name ?? "unknown"
]
// Merge additional data
data.forEach { logEntry[$0.key] = $0.value }
return logEntry
}
private func writeLog(_ entry: [String: Any]) {
// Convert to JSON and write to file or send to logging service
if let jsonData = try? JSONSerialization.data(withJSONObject: entry, options: .prettyPrinted),
let jsonString = String(data: jsonData, encoding: .utf8) {
print(jsonString)
// Write to file or send to external logging service
}
}
// Convenience methods
func debug(_ message: String, data: [String: Any] = [:]) {
log(.debug, message: message, data: data)
}
func info(_ message: String, data: [String: Any] = [:]) {
log(.info, message: message, data: data)
}
func warning(_ message: String, data: [String: Any] = [:]) {
log(.warning, message: message, data: data)
}
func error(_ message: String, data: [String: Any] = [:]) {
log(.error, message: message, data: data)
}
}
Integration with External Monitoring Services
1. Custom Analytics Service Integration
Create a service to send analytics data to external platforms:
class ExternalAnalyticsService {
private let apiEndpoint: URL
private let apiKey: String
private let session: URLSession
init(endpoint: String, apiKey: String) {
self.apiEndpoint = URL(string: endpoint)!
self.apiKey = apiKey
self.session = URLSession.shared
}
func sendMetrics(_ metrics: [String: Any]) {
var request = URLRequest(url: apiEndpoint)
request.httpMethod = "POST"
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
do {
request.httpBody = try JSONSerialization.data(withJSONObject: metrics)
session.dataTask(with: request) { data, response, error in
if let error = error {
print("Failed to send metrics: \(error)")
} else if let httpResponse = response as? HTTPURLResponse,
httpResponse.statusCode == 200 {
print("Metrics sent successfully")
}
}.resume()
} catch {
print("Failed to serialize metrics: \(error)")
}
}
func sendAlert(_ alert: [String: Any]) {
// Similar implementation for sending alerts
var alertData = alert
alertData["type"] = "alert"
sendMetrics(alertData)
}
}
2. Integration with System Monitoring
For iOS apps, integrate with system-level monitoring:
import UIKit
class SystemMonitor {
func getMemoryUsage() -> Double {
var info = mach_task_basic_info()
var count = mach_msg_type_number_t(MemoryLayout<mach_task_basic_info>.size)/4
let kerr: kern_return_t = withUnsafeMutablePointer(to: &info) {
$0.withMemoryRebound(to: integer_t.self, capacity: 1) {
task_info(mach_task_self_,
task_flavor_t(MACH_TASK_BASIC_INFO),
$0,
&count)
}
}
if kerr == KERN_SUCCESS {
return Double(info.resident_size) / 1024.0 / 1024.0 // MB
}
return 0
}
func getCPUUsage() -> Double {
var info = mach_task_basic_info()
var count = mach_msg_type_number_t(MemoryLayout<mach_task_basic_info>.size)/4
let kerr: kern_return_t = withUnsafeMutablePointer(to: &info) {
$0.withMemoryRebound(to: integer_t.self, capacity: 1) {
task_info(mach_task_self_,
task_flavor_t(MACH_TASK_BASIC_INFO),
$0,
&count)
}
}
if kerr == KERN_SUCCESS {
return Double(info.user_time.microseconds + info.system_time.microseconds) / 1000000.0
}
return 0
}
}
Putting It All Together
Here's how to integrate all components into a complete monitoring solution:
class ScrapingManager {
private let analytics: ScrapingAnalytics
private let errorTracker: ErrorTracker
private let logger: ScrapingLogger
private let healthMonitor: HealthMonitor
private let externalService: ExternalAnalyticsService
private let systemMonitor: SystemMonitor
init() {
self.analytics = ScrapingAnalytics()
self.errorTracker = ErrorTracker()
self.logger = ScrapingLogger()
self.healthMonitor = HealthMonitor(analytics: analytics, errorTracker: errorTracker)
self.externalService = ExternalAnalyticsService(
endpoint: "https://your-analytics-service.com/api/metrics",
apiKey: "your-api-key"
)
self.systemMonitor = SystemMonitor()
}
func scrapeWebsite(_ url: URL) async throws -> Data {
logger.info("Starting web scrape", data: ["url": url.absoluteString])
let startTime = Date()
let request = URLRequest(url: url)
let session = AnalyticsURLSession(analytics: analytics)
return try await withCheckedThrowingContinuation { continuation in
session.dataTask(with: request) { [weak self] data, response, error in
let duration = Date().timeIntervalSince(startTime)
if let error = error {
let scrapingError = ScrapingError.unknown(error)
self?.errorTracker.trackError(scrapingError, context: [
"url": url.absoluteString,
"duration": duration
])
self?.logger.error("Scraping failed", data: [
"url": url.absoluteString,
"error": error.localizedDescription
])
continuation.resume(throwing: scrapingError)
} else if let data = data {
self?.logger.info("Scraping completed successfully", data: [
"url": url.absoluteString,
"data_size": data.count,
"duration": duration
])
continuation.resume(returning: data)
}
}.resume()
}
}
func generateReport() -> [String: Any] {
return [
"success_rate": analytics.getSuccessRate(),
"average_response_time": analytics.getAverageResponseTime(),
"memory_usage": systemMonitor.getMemoryUsage(),
"cpu_usage": systemMonitor.getCPUUsage(),
"timestamp": ISO8601DateFormatter().string(from: Date())
]
}
}
Best Practices and Considerations
When implementing analytics and monitoring for Swift web scraping applications:
- Performance Impact: Ensure monitoring doesn't significantly impact scraping performance
- Data Privacy: Be mindful of what data you collect and log, especially when scraping sensitive content
- Storage Management: Implement log rotation and data retention policies
- Real-time Monitoring: Consider implementing real-time dashboards for critical metrics
- Alerting: Set up intelligent alerting thresholds based on historical data
Similar to how to monitor network requests in Puppeteer, Swift applications benefit from comprehensive network monitoring, while handling errors in Puppeteer shares similar error handling principles that can be adapted to Swift implementations.
Conclusion
Effective analytics and monitoring in Swift web scraping applications requires a multi-layered approach combining performance tracking, error monitoring, structured logging, and system health checks. By implementing these patterns, you can maintain reliable scraping operations, quickly identify and resolve issues, and optimize performance over time.
The key is to start with basic metrics collection and gradually expand your monitoring capabilities as your application grows in complexity and scale.