You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

311 lines
8.4 KiB

/**
* Observability & Health Monitoring Implementation
* Provides structured logging, event codes, and health monitoring
*
* @author Matthew Raymer
* @version 1.1.0
*/
export interface HealthStatus {
nextRuns: number[];
lastOutcomes: string[];
cacheAgeMs: number | null;
staleArmed: boolean;
queueDepth: number;
circuitBreakers: {
total: number;
open: number;
failures: number;
};
performance: {
avgFetchTime: number;
avgNotifyTime: number;
successRate: number;
};
}
export interface EventLog {
id: string;
timestamp: number;
level: 'INFO' | 'WARN' | 'ERROR';
eventCode: string;
message: string;
data?: Record<string, unknown>;
duration?: number;
}
export interface PerformanceMetrics {
fetchTimes: number[];
notifyTimes: number[];
callbackTimes: number[];
successCount: number;
failureCount: number;
lastReset: number;
}
/**
* Observability Manager
* Handles structured logging, health monitoring, and performance tracking
*/
export class ObservabilityManager {
private eventLogs: EventLog[] = [];
private performanceMetrics: PerformanceMetrics = {
fetchTimes: [],
notifyTimes: [],
callbackTimes: [],
successCount: 0,
failureCount: 0,
lastReset: Date.now()
};
private maxLogs = 1000;
private maxMetrics = 100;
/**
* Log structured event with event code
*/
logEvent(
level: 'INFO' | 'WARN' | 'ERROR',
eventCode: string,
message: string,
data?: Record<string, unknown>,
duration?: number
): void {
const event: EventLog = {
id: this.generateEventId(),
timestamp: Date.now(),
level,
eventCode,
message,
data,
duration
};
this.eventLogs.unshift(event);
// Keep only recent logs
if (this.eventLogs.length > this.maxLogs) {
this.eventLogs = this.eventLogs.slice(0, this.maxLogs);
}
// Console output with structured format
const logMessage = `[${eventCode}] ${message}`;
const logData = data ? ` | Data: ${JSON.stringify(data)}` : '';
const logDuration = duration ? ` | Duration: ${duration}ms` : '';
switch (level) {
case 'INFO':
console.log(logMessage + logData + logDuration);
break;
case 'WARN':
console.warn(logMessage + logData + logDuration);
break;
case 'ERROR':
console.error(logMessage + logData + logDuration);
break;
}
}
/**
* Record performance metrics
*/
recordMetric(type: 'fetch' | 'notify' | 'callback', duration: number, success: boolean): void {
switch (type) {
case 'fetch':
this.performanceMetrics.fetchTimes.push(duration);
break;
case 'notify':
this.performanceMetrics.notifyTimes.push(duration);
break;
case 'callback':
this.performanceMetrics.callbackTimes.push(duration);
break;
}
if (success) {
this.performanceMetrics.successCount++;
} else {
this.performanceMetrics.failureCount++;
}
// Keep only recent metrics
this.trimMetrics();
}
/**
* Get health status
*/
async getHealthStatus(): Promise<HealthStatus> {
const now = Date.now();
const recentLogs = this.eventLogs.filter(log => now - log.timestamp < 24 * 60 * 60 * 1000); // Last 24 hours
// Calculate next runs (mock implementation)
const nextRuns = this.calculateNextRuns();
// Get last outcomes from recent logs
const lastOutcomes = recentLogs
.filter(log => log.eventCode.startsWith('DNP-FETCH-') || log.eventCode.startsWith('DNP-NOTIFY-'))
.slice(0, 10)
.map(log => log.eventCode);
// Calculate cache age (mock implementation)
const cacheAgeMs = this.calculateCacheAge();
// Check if stale armed
const staleArmed = cacheAgeMs ? cacheAgeMs > 3600000 : true; // 1 hour
// Calculate queue depth
const queueDepth = recentLogs.filter(log =>
log.eventCode.includes('QUEUE') || log.eventCode.includes('RETRY')
).length;
// Circuit breaker status
const circuitBreakers = this.getCircuitBreakerStatus();
// Performance metrics
const performance = this.calculatePerformanceMetrics();
return {
nextRuns,
lastOutcomes,
cacheAgeMs,
staleArmed,
queueDepth,
circuitBreakers,
performance
};
}
/**
* Get recent event logs
*/
getRecentLogs(limit: number = 50): EventLog[] {
return this.eventLogs.slice(0, limit);
}
/**
* Get performance metrics
*/
getPerformanceMetrics(): PerformanceMetrics {
return { ...this.performanceMetrics };
}
/**
* Reset performance metrics
*/
resetMetrics(): void {
this.performanceMetrics = {
fetchTimes: [],
notifyTimes: [],
callbackTimes: [],
successCount: 0,
failureCount: 0,
lastReset: Date.now()
};
this.logEvent('INFO', 'DNP-METRICS-RESET', 'Performance metrics reset');
}
/**
* Compact old logs (called by cleanup job)
*/
compactLogs(olderThanMs: number = 30 * 24 * 60 * 60 * 1000): number { // 30 days
const cutoff = Date.now() - olderThanMs;
const initialCount = this.eventLogs.length;
this.eventLogs = this.eventLogs.filter(log => log.timestamp >= cutoff);
const removedCount = initialCount - this.eventLogs.length;
if (removedCount > 0) {
this.logEvent('INFO', 'DNP-LOGS-COMPACTED', `Removed ${removedCount} old logs`);
}
return removedCount;
}
// Private helper methods
private generateEventId(): string {
return `evt_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
private trimMetrics(): void {
if (this.performanceMetrics.fetchTimes.length > this.maxMetrics) {
this.performanceMetrics.fetchTimes = this.performanceMetrics.fetchTimes.slice(-this.maxMetrics);
}
if (this.performanceMetrics.notifyTimes.length > this.maxMetrics) {
this.performanceMetrics.notifyTimes = this.performanceMetrics.notifyTimes.slice(-this.maxMetrics);
}
if (this.performanceMetrics.callbackTimes.length > this.maxMetrics) {
this.performanceMetrics.callbackTimes = this.performanceMetrics.callbackTimes.slice(-this.maxMetrics);
}
}
private calculateNextRuns(): number[] {
// Mock implementation - would calculate from actual schedules
const now = Date.now();
return [
now + (60 * 60 * 1000), // 1 hour from now
now + (24 * 60 * 60 * 1000) // 24 hours from now
];
}
private calculateCacheAge(): number | null {
// Mock implementation - would get from actual cache
return 1800000; // 30 minutes
}
private getCircuitBreakerStatus(): { total: number; open: number; failures: number } {
// Mock implementation - would get from actual circuit breakers
return {
total: 3,
open: 1,
failures: 5
};
}
private calculatePerformanceMetrics(): {
avgFetchTime: number;
avgNotifyTime: number;
successRate: number;
} {
const fetchTimes = this.performanceMetrics.fetchTimes;
const notifyTimes = this.performanceMetrics.notifyTimes;
const totalOperations = this.performanceMetrics.successCount + this.performanceMetrics.failureCount;
return {
avgFetchTime: fetchTimes.length > 0 ?
fetchTimes.reduce((a, b) => a + b, 0) / fetchTimes.length : 0,
avgNotifyTime: notifyTimes.length > 0 ?
notifyTimes.reduce((a, b) => a + b, 0) / notifyTimes.length : 0,
successRate: totalOperations > 0 ?
this.performanceMetrics.successCount / totalOperations : 0
};
}
}
// Singleton instance
export const observability = new ObservabilityManager();
// Event code constants
export const EVENT_CODES = {
FETCH_START: 'DNP-FETCH-START',
FETCH_SUCCESS: 'DNP-FETCH-SUCCESS',
FETCH_FAILURE: 'DNP-FETCH-FAILURE',
FETCH_RETRY: 'DNP-FETCH-RETRY',
NOTIFY_START: 'DNP-NOTIFY-START',
NOTIFY_SUCCESS: 'DNP-NOTIFY-SUCCESS',
NOTIFY_FAILURE: 'DNP-NOTIFY-FAILURE',
NOTIFY_SKIPPED_TTL: 'DNP-NOTIFY-SKIPPED-TTL',
CALLBACK_START: 'DNP-CB-START',
CALLBACK_SUCCESS: 'DNP-CB-SUCCESS',
CALLBACK_FAILURE: 'DNP-CB-FAILURE',
CALLBACK_RETRY: 'DNP-CB-RETRY',
CALLBACK_CIRCUIT_OPEN: 'DNP-CB-CIRCUIT-OPEN',
CALLBACK_CIRCUIT_CLOSE: 'DNP-CB-CIRCUIT-CLOSE',
BOOT_RECOVERY: 'DNP-BOOT-RECOVERY',
SCHEDULE_UPDATE: 'DNP-SCHEDULE-UPDATE',
CACHE_HIT: 'DNP-CACHE-HIT',
CACHE_MISS: 'DNP-CACHE-MISS',
TTL_EXPIRED: 'DNP-TTL-EXPIRED',
METRICS_RESET: 'DNP-METRICS-RESET',
LOGS_COMPACTED: 'DNP-LOGS-COMPACTED'
} as const;