/** * Observability & Health Monitoring Implementation * Provides structured logging, event codes, and health monitoring * * @author Matthew Raymer * @version 1.1.0 */ export interface HealthStatus { nextRuns: number[]; lastOutcomes: string[]; cacheAgeMs: number | null; staleArmed: boolean; queueDepth: number; circuitBreakers: { total: number; open: number; failures: number; }; performance: { avgFetchTime: number; avgNotifyTime: number; successRate: number; }; } export interface EventLog { id: string; timestamp: number; level: 'INFO' | 'WARN' | 'ERROR'; eventCode: string; message: string; data?: Record; duration?: number; } export interface PerformanceMetrics { fetchTimes: number[]; notifyTimes: number[]; callbackTimes: number[]; successCount: number; failureCount: number; lastReset: number; } /** * Observability Manager * Handles structured logging, health monitoring, and performance tracking */ export class ObservabilityManager { private eventLogs: EventLog[] = []; private performanceMetrics: PerformanceMetrics = { fetchTimes: [], notifyTimes: [], callbackTimes: [], successCount: 0, failureCount: 0, lastReset: Date.now() }; private maxLogs = 1000; private maxMetrics = 100; /** * Log structured event with event code */ logEvent( level: 'INFO' | 'WARN' | 'ERROR', eventCode: string, message: string, data?: Record, duration?: number ): void { const event: EventLog = { id: this.generateEventId(), timestamp: Date.now(), level, eventCode, message, data, duration }; this.eventLogs.unshift(event); // Keep only recent logs if (this.eventLogs.length > this.maxLogs) { this.eventLogs = this.eventLogs.slice(0, this.maxLogs); } // Console output with structured format const logMessage = `[${eventCode}] ${message}`; const logData = data ? ` | Data: ${JSON.stringify(data)}` : ''; const logDuration = duration ? ` | Duration: ${duration}ms` : ''; switch (level) { case 'INFO': // console.log(logMessage + logData + logDuration); break; case 'WARN': // console.warn(logMessage + logData + logDuration); break; case 'ERROR': // console.error(logMessage + logData + logDuration); break; } } /** * Record performance metrics */ recordMetric(type: 'fetch' | 'notify' | 'callback', duration: number, success: boolean): void { switch (type) { case 'fetch': this.performanceMetrics.fetchTimes.push(duration); break; case 'notify': this.performanceMetrics.notifyTimes.push(duration); break; case 'callback': this.performanceMetrics.callbackTimes.push(duration); break; } if (success) { this.performanceMetrics.successCount++; } else { this.performanceMetrics.failureCount++; } // Keep only recent metrics this.trimMetrics(); } /** * Get health status */ async getHealthStatus(): Promise { const now = Date.now(); const recentLogs = this.eventLogs.filter(log => now - log.timestamp < 24 * 60 * 60 * 1000); // Last 24 hours // Calculate next runs (mock implementation) const nextRuns = this.calculateNextRuns(); // Get last outcomes from recent logs const lastOutcomes = recentLogs .filter(log => log.eventCode.startsWith('DNP-FETCH-') || log.eventCode.startsWith('DNP-NOTIFY-')) .slice(0, 10) .map(log => log.eventCode); // Calculate cache age (mock implementation) const cacheAgeMs = this.calculateCacheAge(); // Check if stale armed const staleArmed = cacheAgeMs ? cacheAgeMs > 3600000 : true; // 1 hour // Calculate queue depth const queueDepth = recentLogs.filter(log => log.eventCode.includes('QUEUE') || log.eventCode.includes('RETRY') ).length; // Circuit breaker status const circuitBreakers = this.getCircuitBreakerStatus(); // Performance metrics const performance = this.calculatePerformanceMetrics(); return { nextRuns, lastOutcomes, cacheAgeMs, staleArmed, queueDepth, circuitBreakers, performance }; } /** * Get recent event logs */ getRecentLogs(limit = 50): EventLog[] { return this.eventLogs.slice(0, limit); } /** * Get performance metrics */ getPerformanceMetrics(): PerformanceMetrics { return { ...this.performanceMetrics }; } /** * Reset performance metrics */ resetMetrics(): void { this.performanceMetrics = { fetchTimes: [], notifyTimes: [], callbackTimes: [], successCount: 0, failureCount: 0, lastReset: Date.now() }; this.logEvent('INFO', 'DNP-METRICS-RESET', 'Performance metrics reset'); } /** * Compact old logs (called by cleanup job) */ compactLogs(olderThanMs: number = 30 * 24 * 60 * 60 * 1000): number { // 30 days const cutoff = Date.now() - olderThanMs; const initialCount = this.eventLogs.length; this.eventLogs = this.eventLogs.filter(log => log.timestamp >= cutoff); const removedCount = initialCount - this.eventLogs.length; if (removedCount > 0) { this.logEvent('INFO', 'DNP-LOGS-COMPACTED', `Removed ${removedCount} old logs`); } return removedCount; } // Private helper methods private generateEventId(): string { return `evt_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; } private trimMetrics(): void { if (this.performanceMetrics.fetchTimes.length > this.maxMetrics) { this.performanceMetrics.fetchTimes = this.performanceMetrics.fetchTimes.slice(-this.maxMetrics); } if (this.performanceMetrics.notifyTimes.length > this.maxMetrics) { this.performanceMetrics.notifyTimes = this.performanceMetrics.notifyTimes.slice(-this.maxMetrics); } if (this.performanceMetrics.callbackTimes.length > this.maxMetrics) { this.performanceMetrics.callbackTimes = this.performanceMetrics.callbackTimes.slice(-this.maxMetrics); } } private calculateNextRuns(): number[] { // Mock implementation - would calculate from actual schedules const now = Date.now(); return [ now + (60 * 60 * 1000), // 1 hour from now now + (24 * 60 * 60 * 1000) // 24 hours from now ]; } private calculateCacheAge(): number | null { // Mock implementation - would get from actual cache return 1800000; // 30 minutes } private getCircuitBreakerStatus(): { total: number; open: number; failures: number } { // Mock implementation - would get from actual circuit breakers return { total: 3, open: 1, failures: 5 }; } private calculatePerformanceMetrics(): { avgFetchTime: number; avgNotifyTime: number; successRate: number; } { const fetchTimes = this.performanceMetrics.fetchTimes; const notifyTimes = this.performanceMetrics.notifyTimes; const totalOperations = this.performanceMetrics.successCount + this.performanceMetrics.failureCount; return { avgFetchTime: fetchTimes.length > 0 ? fetchTimes.reduce((a, b) => a + b, 0) / fetchTimes.length : 0, avgNotifyTime: notifyTimes.length > 0 ? notifyTimes.reduce((a, b) => a + b, 0) / notifyTimes.length : 0, successRate: totalOperations > 0 ? this.performanceMetrics.successCount / totalOperations : 0 }; } } // Singleton instance export const observability = new ObservabilityManager(); // Event code constants export const EVENT_CODES = { FETCH_START: 'DNP-FETCH-START', FETCH_SUCCESS: 'DNP-FETCH-SUCCESS', FETCH_FAILURE: 'DNP-FETCH-FAILURE', FETCH_RETRY: 'DNP-FETCH-RETRY', NOTIFY_START: 'DNP-NOTIFY-START', NOTIFY_SUCCESS: 'DNP-NOTIFY-SUCCESS', NOTIFY_FAILURE: 'DNP-NOTIFY-FAILURE', NOTIFY_SKIPPED_TTL: 'DNP-NOTIFY-SKIPPED-TTL', CALLBACK_START: 'DNP-CB-START', CALLBACK_SUCCESS: 'DNP-CB-SUCCESS', CALLBACK_FAILURE: 'DNP-CB-FAILURE', CALLBACK_RETRY: 'DNP-CB-RETRY', CALLBACK_CIRCUIT_OPEN: 'DNP-CB-CIRCUIT-OPEN', CALLBACK_CIRCUIT_CLOSE: 'DNP-CB-CIRCUIT-CLOSE', BOOT_RECOVERY: 'DNP-BOOT-RECOVERY', SCHEDULE_UPDATE: 'DNP-SCHEDULE-UPDATE', CACHE_HIT: 'DNP-CACHE-HIT', CACHE_MISS: 'DNP-CACHE-MISS', TTL_EXPIRED: 'DNP-TTL-EXPIRED', METRICS_RESET: 'DNP-METRICS-RESET', LOGS_COMPACTED: 'DNP-LOGS-COMPACTED' } as const;