feat(android)!: implement Phase 2 Android core with WorkManager + AlarmManager + SQLite
- Add complete SQLite schema with Room database (content_cache, schedules, callbacks, history) - Implement WorkManager FetchWorker with exponential backoff and network constraints - Add AlarmManager NotifyReceiver with TTL-at-fire logic and notification delivery - Create BootReceiver for automatic rescheduling after device reboot - Update AndroidManifest.xml with necessary permissions and receivers - Add Room, WorkManager, and Kotlin coroutines dependencies to build.gradle feat(callback-registry)!: implement callback registry with circuit breaker - Add CallbackRegistryImpl with HTTP, local, and queue callback support - Implement circuit breaker pattern with exponential backoff retry logic - Add CallbackEvent interface with structured event types - Support for exactly-once delivery semantics with retry queue - Include callback status monitoring and health checks feat(observability)!: add comprehensive observability and health monitoring - Implement ObservabilityManager with structured logging and event codes - Add performance metrics tracking (fetch, notify, callback times) - Create health status API with circuit breaker monitoring - Include log compaction and metrics reset functionality - Support for DNP-* event codes throughout the system feat(web)!: enhance web implementation with new functionality - Integrate callback registry and observability into web platform - Add mock implementations for dual scheduling methods - Implement performance tracking and structured logging - Support for local callback registration and management - Enhanced error handling and event logging BREAKING CHANGE: New Android dependencies require Room, WorkManager, and Kotlin coroutines
This commit is contained in:
311
src/observability.ts
Normal file
311
src/observability.ts
Normal file
@@ -0,0 +1,311 @@
|
||||
/**
|
||||
* Observability & Health Monitoring Implementation
|
||||
* Provides structured logging, event codes, and health monitoring
|
||||
*
|
||||
* @author Matthew Raymer
|
||||
* @version 1.1.0
|
||||
*/
|
||||
|
||||
export interface HealthStatus {
|
||||
nextRuns: number[];
|
||||
lastOutcomes: string[];
|
||||
cacheAgeMs: number | null;
|
||||
staleArmed: boolean;
|
||||
queueDepth: number;
|
||||
circuitBreakers: {
|
||||
total: number;
|
||||
open: number;
|
||||
failures: number;
|
||||
};
|
||||
performance: {
|
||||
avgFetchTime: number;
|
||||
avgNotifyTime: number;
|
||||
successRate: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface EventLog {
|
||||
id: string;
|
||||
timestamp: number;
|
||||
level: 'INFO' | 'WARN' | 'ERROR';
|
||||
eventCode: string;
|
||||
message: string;
|
||||
data?: Record<string, unknown>;
|
||||
duration?: number;
|
||||
}
|
||||
|
||||
export interface PerformanceMetrics {
|
||||
fetchTimes: number[];
|
||||
notifyTimes: number[];
|
||||
callbackTimes: number[];
|
||||
successCount: number;
|
||||
failureCount: number;
|
||||
lastReset: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Observability Manager
|
||||
* Handles structured logging, health monitoring, and performance tracking
|
||||
*/
|
||||
export class ObservabilityManager {
|
||||
private eventLogs: EventLog[] = [];
|
||||
private performanceMetrics: PerformanceMetrics = {
|
||||
fetchTimes: [],
|
||||
notifyTimes: [],
|
||||
callbackTimes: [],
|
||||
successCount: 0,
|
||||
failureCount: 0,
|
||||
lastReset: Date.now()
|
||||
};
|
||||
private maxLogs = 1000;
|
||||
private maxMetrics = 100;
|
||||
|
||||
/**
|
||||
* Log structured event with event code
|
||||
*/
|
||||
logEvent(
|
||||
level: 'INFO' | 'WARN' | 'ERROR',
|
||||
eventCode: string,
|
||||
message: string,
|
||||
data?: Record<string, unknown>,
|
||||
duration?: number
|
||||
): void {
|
||||
const event: EventLog = {
|
||||
id: this.generateEventId(),
|
||||
timestamp: Date.now(),
|
||||
level,
|
||||
eventCode,
|
||||
message,
|
||||
data,
|
||||
duration
|
||||
};
|
||||
|
||||
this.eventLogs.unshift(event);
|
||||
|
||||
// Keep only recent logs
|
||||
if (this.eventLogs.length > this.maxLogs) {
|
||||
this.eventLogs = this.eventLogs.slice(0, this.maxLogs);
|
||||
}
|
||||
|
||||
// Console output with structured format
|
||||
const logMessage = `[${eventCode}] ${message}`;
|
||||
const logData = data ? ` | Data: ${JSON.stringify(data)}` : '';
|
||||
const logDuration = duration ? ` | Duration: ${duration}ms` : '';
|
||||
|
||||
switch (level) {
|
||||
case 'INFO':
|
||||
console.log(logMessage + logData + logDuration);
|
||||
break;
|
||||
case 'WARN':
|
||||
console.warn(logMessage + logData + logDuration);
|
||||
break;
|
||||
case 'ERROR':
|
||||
console.error(logMessage + logData + logDuration);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record performance metrics
|
||||
*/
|
||||
recordMetric(type: 'fetch' | 'notify' | 'callback', duration: number, success: boolean): void {
|
||||
switch (type) {
|
||||
case 'fetch':
|
||||
this.performanceMetrics.fetchTimes.push(duration);
|
||||
break;
|
||||
case 'notify':
|
||||
this.performanceMetrics.notifyTimes.push(duration);
|
||||
break;
|
||||
case 'callback':
|
||||
this.performanceMetrics.callbackTimes.push(duration);
|
||||
break;
|
||||
}
|
||||
|
||||
if (success) {
|
||||
this.performanceMetrics.successCount++;
|
||||
} else {
|
||||
this.performanceMetrics.failureCount++;
|
||||
}
|
||||
|
||||
// Keep only recent metrics
|
||||
this.trimMetrics();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get health status
|
||||
*/
|
||||
async getHealthStatus(): Promise<HealthStatus> {
|
||||
const now = Date.now();
|
||||
const recentLogs = this.eventLogs.filter(log => now - log.timestamp < 24 * 60 * 60 * 1000); // Last 24 hours
|
||||
|
||||
// Calculate next runs (mock implementation)
|
||||
const nextRuns = this.calculateNextRuns();
|
||||
|
||||
// Get last outcomes from recent logs
|
||||
const lastOutcomes = recentLogs
|
||||
.filter(log => log.eventCode.startsWith('DNP-FETCH-') || log.eventCode.startsWith('DNP-NOTIFY-'))
|
||||
.slice(0, 10)
|
||||
.map(log => log.eventCode);
|
||||
|
||||
// Calculate cache age (mock implementation)
|
||||
const cacheAgeMs = this.calculateCacheAge();
|
||||
|
||||
// Check if stale armed
|
||||
const staleArmed = cacheAgeMs ? cacheAgeMs > 3600000 : true; // 1 hour
|
||||
|
||||
// Calculate queue depth
|
||||
const queueDepth = recentLogs.filter(log =>
|
||||
log.eventCode.includes('QUEUE') || log.eventCode.includes('RETRY')
|
||||
).length;
|
||||
|
||||
// Circuit breaker status
|
||||
const circuitBreakers = this.getCircuitBreakerStatus();
|
||||
|
||||
// Performance metrics
|
||||
const performance = this.calculatePerformanceMetrics();
|
||||
|
||||
return {
|
||||
nextRuns,
|
||||
lastOutcomes,
|
||||
cacheAgeMs,
|
||||
staleArmed,
|
||||
queueDepth,
|
||||
circuitBreakers,
|
||||
performance
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recent event logs
|
||||
*/
|
||||
getRecentLogs(limit: number = 50): EventLog[] {
|
||||
return this.eventLogs.slice(0, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get performance metrics
|
||||
*/
|
||||
getPerformanceMetrics(): PerformanceMetrics {
|
||||
return { ...this.performanceMetrics };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset performance metrics
|
||||
*/
|
||||
resetMetrics(): void {
|
||||
this.performanceMetrics = {
|
||||
fetchTimes: [],
|
||||
notifyTimes: [],
|
||||
callbackTimes: [],
|
||||
successCount: 0,
|
||||
failureCount: 0,
|
||||
lastReset: Date.now()
|
||||
};
|
||||
|
||||
this.logEvent('INFO', 'DNP-METRICS-RESET', 'Performance metrics reset');
|
||||
}
|
||||
|
||||
/**
|
||||
* Compact old logs (called by cleanup job)
|
||||
*/
|
||||
compactLogs(olderThanMs: number = 30 * 24 * 60 * 60 * 1000): number { // 30 days
|
||||
const cutoff = Date.now() - olderThanMs;
|
||||
const initialCount = this.eventLogs.length;
|
||||
|
||||
this.eventLogs = this.eventLogs.filter(log => log.timestamp >= cutoff);
|
||||
|
||||
const removedCount = initialCount - this.eventLogs.length;
|
||||
if (removedCount > 0) {
|
||||
this.logEvent('INFO', 'DNP-LOGS-COMPACTED', `Removed ${removedCount} old logs`);
|
||||
}
|
||||
|
||||
return removedCount;
|
||||
}
|
||||
|
||||
// Private helper methods
|
||||
private generateEventId(): string {
|
||||
return `evt_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||||
}
|
||||
|
||||
private trimMetrics(): void {
|
||||
if (this.performanceMetrics.fetchTimes.length > this.maxMetrics) {
|
||||
this.performanceMetrics.fetchTimes = this.performanceMetrics.fetchTimes.slice(-this.maxMetrics);
|
||||
}
|
||||
if (this.performanceMetrics.notifyTimes.length > this.maxMetrics) {
|
||||
this.performanceMetrics.notifyTimes = this.performanceMetrics.notifyTimes.slice(-this.maxMetrics);
|
||||
}
|
||||
if (this.performanceMetrics.callbackTimes.length > this.maxMetrics) {
|
||||
this.performanceMetrics.callbackTimes = this.performanceMetrics.callbackTimes.slice(-this.maxMetrics);
|
||||
}
|
||||
}
|
||||
|
||||
private calculateNextRuns(): number[] {
|
||||
// Mock implementation - would calculate from actual schedules
|
||||
const now = Date.now();
|
||||
return [
|
||||
now + (60 * 60 * 1000), // 1 hour from now
|
||||
now + (24 * 60 * 60 * 1000) // 24 hours from now
|
||||
];
|
||||
}
|
||||
|
||||
private calculateCacheAge(): number | null {
|
||||
// Mock implementation - would get from actual cache
|
||||
return 1800000; // 30 minutes
|
||||
}
|
||||
|
||||
private getCircuitBreakerStatus(): { total: number; open: number; failures: number } {
|
||||
// Mock implementation - would get from actual circuit breakers
|
||||
return {
|
||||
total: 3,
|
||||
open: 1,
|
||||
failures: 5
|
||||
};
|
||||
}
|
||||
|
||||
private calculatePerformanceMetrics(): {
|
||||
avgFetchTime: number;
|
||||
avgNotifyTime: number;
|
||||
successRate: number;
|
||||
} {
|
||||
const fetchTimes = this.performanceMetrics.fetchTimes;
|
||||
const notifyTimes = this.performanceMetrics.notifyTimes;
|
||||
const totalOperations = this.performanceMetrics.successCount + this.performanceMetrics.failureCount;
|
||||
|
||||
return {
|
||||
avgFetchTime: fetchTimes.length > 0 ?
|
||||
fetchTimes.reduce((a, b) => a + b, 0) / fetchTimes.length : 0,
|
||||
avgNotifyTime: notifyTimes.length > 0 ?
|
||||
notifyTimes.reduce((a, b) => a + b, 0) / notifyTimes.length : 0,
|
||||
successRate: totalOperations > 0 ?
|
||||
this.performanceMetrics.successCount / totalOperations : 0
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
export const observability = new ObservabilityManager();
|
||||
|
||||
// Event code constants
|
||||
export const EVENT_CODES = {
|
||||
FETCH_START: 'DNP-FETCH-START',
|
||||
FETCH_SUCCESS: 'DNP-FETCH-SUCCESS',
|
||||
FETCH_FAILURE: 'DNP-FETCH-FAILURE',
|
||||
FETCH_RETRY: 'DNP-FETCH-RETRY',
|
||||
NOTIFY_START: 'DNP-NOTIFY-START',
|
||||
NOTIFY_SUCCESS: 'DNP-NOTIFY-SUCCESS',
|
||||
NOTIFY_FAILURE: 'DNP-NOTIFY-FAILURE',
|
||||
NOTIFY_SKIPPED_TTL: 'DNP-NOTIFY-SKIPPED-TTL',
|
||||
CALLBACK_START: 'DNP-CB-START',
|
||||
CALLBACK_SUCCESS: 'DNP-CB-SUCCESS',
|
||||
CALLBACK_FAILURE: 'DNP-CB-FAILURE',
|
||||
CALLBACK_RETRY: 'DNP-CB-RETRY',
|
||||
CALLBACK_CIRCUIT_OPEN: 'DNP-CB-CIRCUIT-OPEN',
|
||||
CALLBACK_CIRCUIT_CLOSE: 'DNP-CB-CIRCUIT-CLOSE',
|
||||
BOOT_RECOVERY: 'DNP-BOOT-RECOVERY',
|
||||
SCHEDULE_UPDATE: 'DNP-SCHEDULE-UPDATE',
|
||||
CACHE_HIT: 'DNP-CACHE-HIT',
|
||||
CACHE_MISS: 'DNP-CACHE-MISS',
|
||||
TTL_EXPIRED: 'DNP-TTL-EXPIRED',
|
||||
METRICS_RESET: 'DNP-METRICS-RESET',
|
||||
LOGS_COMPACTED: 'DNP-LOGS-COMPACTED'
|
||||
} as const;
|
||||
Reference in New Issue
Block a user