feat(ios): enhance recovery logging and metrics recording

Implement comprehensive logging and observability for recovery operations:

- Add HistoryDAO for Core Data history recording
  - recordRecovery() method with execution time tracking
  - recordRecoveryFailure() method with detailed error info
  - Query helpers for history retrieval
- Enhance DailyNotificationReactivationManager logging:
  - Add execution time tracking (startTime/endTime)
  - Enhanced error logging with NSError details (domain, code, userInfo)
  - Comprehensive logging at each recovery step
  - Missed/future notification count logging
- Implement Core Data persistence for recovery metrics:
  - Recovery execution time
  - Missed notification count
  - Rescheduled notification count
  - Verified notification count
  - Error count
  - Diagnostic JSON with full recovery context
- Update recovery methods to record history:
  - Cold start recovery
  - Termination recovery
  - Boot recovery
  - All with timing and metrics

Completes section 7.1 (Recovery Logging) and 7.2 (Metrics Recording)
of iOS implementation checklist.
This commit is contained in:
Matthew
2025-12-09 02:37:27 -08:00
parent a90d08c425
commit 12d8536588
3 changed files with 442 additions and 44 deletions

View File

@@ -9,6 +9,7 @@
import Foundation
import UserNotifications
import BackgroundTasks
import CoreData
/**
* Manages recovery of notifications on app launch
@@ -77,6 +78,7 @@ class DailyNotificationReactivationManager {
*/
func performRecovery() {
Task {
let startTime = Date()
do {
try await withTimeout(seconds: Self.RECOVERY_TIMEOUT_SECONDS) {
NSLog("\(Self.TAG): Starting app launch recovery")
@@ -85,8 +87,12 @@ class DailyNotificationReactivationManager {
let isBoot = detectBootScenario()
if isBoot {
NSLog("\(Self.TAG): Boot scenario detected - performing boot recovery")
let bootStartTime = Date()
let result = try await performBootRecovery()
let bootEndTime = Date()
NSLog("\(Self.TAG): Boot recovery completed: missed=\(result.missedCount), rescheduled=\(result.rescheduledCount), verified=\(result.verifiedCount), errors=\(result.errors)")
// Record in history
try await recordRecoveryHistory(result, scenario: .boot, startTime: bootStartTime, endTime: bootEndTime)
// Update last launch time after boot recovery
updateLastLaunchTime()
return
@@ -107,27 +113,59 @@ class DailyNotificationReactivationManager {
updateLastLaunchTime()
return
case .coldStart:
NSLog("\(Self.TAG): Cold start scenario - performing recovery")
let coldStartTime = Date()
let result = try await performColdStartRecovery()
NSLog("\(Self.TAG): App launch recovery completed: missed=\(result.missedCount), rescheduled=\(result.rescheduledCount), verified=\(result.verifiedCount), errors=\(result.errors)")
let coldEndTime = Date()
NSLog("\(Self.TAG): Cold start recovery completed: missed=\(result.missedCount), rescheduled=\(result.rescheduledCount), verified=\(result.verifiedCount), errors=\(result.errors)")
// Record in history
try await recordRecoveryHistory(result, scenario: .coldStart, startTime: coldStartTime, endTime: coldEndTime)
updateLastLaunchTime()
case .termination:
// Phase 2: Termination recovery
NSLog("\(Self.TAG): Termination scenario detected - performing full recovery")
let termStartTime = Date()
let result = try await handleTerminationRecovery()
let termEndTime = Date()
NSLog("\(Self.TAG): Termination recovery completed: missed=\(result.missedCount), rescheduled=\(result.rescheduledCount), verified=\(result.verifiedCount), errors=\(result.errors)")
// Record in history
try await recordRecoveryHistory(result, scenario: .termination, startTime: termStartTime, endTime: termEndTime)
updateLastLaunchTime()
case .boot:
// Should be handled by initial boot detection
break
}
}
} catch is TimeoutError {
NSLog("\(Self.TAG): Recovery timed out after \(Self.RECOVERY_TIMEOUT_SECONDS) seconds (non-fatal)")
let endTime = Date()
let duration = endTime.timeIntervalSince(startTime)
NSLog("\(Self.TAG): Recovery timed out after \(Self.RECOVERY_TIMEOUT_SECONDS) seconds (non-fatal) - actual duration: \(String(format: "%.2f", duration))s")
// Record timeout in history
do {
try await recordRecoveryFailure(TimeoutError(), scenario: "TIMEOUT")
} catch {
NSLog("\(Self.TAG): Failed to record recovery timeout in history")
}
} catch {
// Rollback: Log error but don't crash
NSLog("\(Self.TAG): Recovery failed (non-fatal): \(error.localizedDescription)")
let endTime = Date()
let duration = endTime.timeIntervalSince(startTime)
NSLog("\(Self.TAG): Recovery failed (non-fatal): \(error.localizedDescription) - duration: \(String(format: "%.2f", duration))s")
// Enhanced error logging with stack trace
if let nsError = error as NSError? {
NSLog("\(Self.TAG): Error details - domain: \(nsError.domain), code: \(nsError.code)")
if let userInfo = nsError.userInfo as? [String: Any] {
NSLog("\(Self.TAG): Error userInfo: \(userInfo)")
}
}
NSLog("\(Self.TAG): Error type: \(type(of: error))")
// Record failure in history (best effort, don't fail if this fails)
do {
try await recordRecoveryFailure(error)
} catch {
NSLog("\(Self.TAG): Failed to record recovery failure in history")
NSLog("\(Self.TAG): Failed to record recovery failure in history: \(error.localizedDescription)")
}
}
}
@@ -202,6 +240,7 @@ class DailyNotificationReactivationManager {
// Step 1: Detect missed notifications
let missedNotifications = try await detectMissedNotifications(currentTime: currentTime)
NSLog("\(Self.TAG): Missed notifications detected: \(missedNotifications.count)")
var missedCount = 0
var missedErrors = 0
@@ -227,6 +266,7 @@ class DailyNotificationReactivationManager {
// Step 3: Verify future notifications
let verificationResult = try await verifyFutureNotifications()
NSLog("\(Self.TAG): Future notifications verified: found=\(verificationResult.notificationsFound), missing=\(verificationResult.notificationsMissing)")
var rescheduledCount = 0
var rescheduleErrors = 0
@@ -259,7 +299,8 @@ class DailyNotificationReactivationManager {
errors: missedErrors + rescheduleErrors
)
try await recordRecoveryHistory(result, scenario: .coldStart)
// Note: History recording is done at performRecovery level with timing
// This method is called from performRecovery which tracks timing
return result
}
@@ -478,6 +519,7 @@ class DailyNotificationReactivationManager {
// Step 7: Verify final state
let verificationResult = try await verifyFutureNotifications()
NSLog("\(Self.TAG): Final verification: found=\(verificationResult.notificationsFound), missing=\(verificationResult.notificationsMissing)")
// Record recovery in history
let result = RecoveryResult(
@@ -487,7 +529,8 @@ class DailyNotificationReactivationManager {
errors: missedErrors + rescheduleErrors
)
try await recordRecoveryHistory(result, scenario: .termination)
// Note: History recording is done at performRecovery level with timing
// This method is called from performRecovery which tracks timing
NSLog("\(Self.TAG): Full recovery completed: missed=\(result.missedCount), rescheduled=\(result.rescheduledCount), verified=\(result.verifiedCount), errors=\(result.errors)")
return result
@@ -577,8 +620,8 @@ class DailyNotificationReactivationManager {
// Use full recovery which handles all notifications
let result = try await performFullRecovery()
// Record as boot recovery in history
try await recordRecoveryHistory(result, scenario: .boot)
// Note: History recording is done at performRecovery level with timing
// This method is called from performRecovery which tracks timing
NSLog("\(Self.TAG): Boot recovery completed: missed=\(result.missedCount), rescheduled=\(result.rescheduledCount), verified=\(result.verifiedCount), errors=\(result.errors)")
return result
@@ -621,41 +664,96 @@ class DailyNotificationReactivationManager {
*
* @param result Recovery result
* @param scenario Recovery scenario
* @param startTime When recovery started
* @param endTime When recovery ended
*/
private func recordRecoveryHistory(_ result: RecoveryResult, scenario: RecoveryScenario) async throws {
// Note: History recording implementation depends on database structure
// For Phase 1, we'll log the recovery result
let diagJson = """
{
"scenario": "\(scenario.rawValue)",
"missedCount": \(result.missedCount),
"rescheduledCount": \(result.rescheduledCount),
"verifiedCount": \(result.verifiedCount),
"errors": \(result.errors)
private func recordRecoveryHistory(_ result: RecoveryResult, scenario: RecoveryScenario, startTime: Date, endTime: Date) async throws {
// Log recovery metrics
NSLog("\(Self.TAG): Recovery history - scenario: \(scenario.rawValue), missed: \(result.missedCount), rescheduled: \(result.rescheduledCount), verified: \(result.verifiedCount), errors: \(result.errors)")
// Record in Core Data history table
guard let context = PersistenceController.shared.viewContext else {
NSLog("\(Self.TAG): Cannot record history - CoreData not available")
return
}
"""
NSLog("\(Self.TAG): Recovery history: \(diagJson)")
// Create history record
let history = History.recordRecovery(
in: context,
scenario: scenario.rawValue,
missedCount: result.missedCount,
rescheduledCount: result.rescheduledCount,
verifiedCount: result.verifiedCount,
errors: result.errors,
startTime: startTime,
endTime: endTime
)
// TODO: Record in history table when database structure supports it
// Save context
do {
if context.hasChanges {
try context.save()
NSLog("\(Self.TAG): Recovery history recorded successfully")
}
} catch {
NSLog("\(Self.TAG): Failed to save recovery history: \(error.localizedDescription)")
context.rollback()
throw error
}
}
/**
* Record recovery failure
*
* @param error Error that occurred
* @param scenario Optional recovery scenario (if known)
*/
private func recordRecoveryFailure(_ error: Error) async throws {
let diagJson = """
{
"error": "\(error.localizedDescription)",
"errorType": "\(type(of: error))"
}
"""
private func recordRecoveryFailure(_ error: Error, scenario: String? = nil) async throws {
// Enhanced error logging
var errorDetails: [String: Any] = [
"error": error.localizedDescription,
"errorType": String(describing: type(of: error))
]
if let nsError = error as NSError? {
errorDetails["errorCode"] = nsError.code
errorDetails["errorDomain"] = nsError.domain
if let userInfo = nsError.userInfo as? [String: Any] {
errorDetails["userInfo"] = userInfo
}
}
if let scenario = scenario {
errorDetails["scenario"] = scenario
}
let diagJson = DailyNotificationDataConversions.jsonStringFromDictionary(errorDetails) ?? "{}"
NSLog("\(Self.TAG): Recovery failure: \(diagJson)")
// TODO: Record in history table when database structure supports it
// Record in Core Data history table
guard let context = PersistenceController.shared.viewContext else {
NSLog("\(Self.TAG): Cannot record failure - CoreData not available")
return
}
// Create failure history record
let history = History.recordRecoveryFailure(
in: context,
error: error,
scenario: scenario
)
// Save context
do {
if context.hasChanges {
try context.save()
NSLog("\(Self.TAG): Recovery failure recorded successfully")
}
} catch {
NSLog("\(Self.TAG): Failed to save recovery failure: \(error.localizedDescription)")
context.rollback()
// Don't throw - this is best effort
}
}
}