You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
311 lines
8.4 KiB
311 lines
8.4 KiB
/**
|
|
* Observability & Health Monitoring Implementation
|
|
* Provides structured logging, event codes, and health monitoring
|
|
*
|
|
* @author Matthew Raymer
|
|
* @version 1.1.0
|
|
*/
|
|
|
|
export interface HealthStatus {
|
|
nextRuns: number[];
|
|
lastOutcomes: string[];
|
|
cacheAgeMs: number | null;
|
|
staleArmed: boolean;
|
|
queueDepth: number;
|
|
circuitBreakers: {
|
|
total: number;
|
|
open: number;
|
|
failures: number;
|
|
};
|
|
performance: {
|
|
avgFetchTime: number;
|
|
avgNotifyTime: number;
|
|
successRate: number;
|
|
};
|
|
}
|
|
|
|
export interface EventLog {
|
|
id: string;
|
|
timestamp: number;
|
|
level: 'INFO' | 'WARN' | 'ERROR';
|
|
eventCode: string;
|
|
message: string;
|
|
data?: Record<string, unknown>;
|
|
duration?: number;
|
|
}
|
|
|
|
export interface PerformanceMetrics {
|
|
fetchTimes: number[];
|
|
notifyTimes: number[];
|
|
callbackTimes: number[];
|
|
successCount: number;
|
|
failureCount: number;
|
|
lastReset: number;
|
|
}
|
|
|
|
/**
|
|
* Observability Manager
|
|
* Handles structured logging, health monitoring, and performance tracking
|
|
*/
|
|
export class ObservabilityManager {
|
|
private eventLogs: EventLog[] = [];
|
|
private performanceMetrics: PerformanceMetrics = {
|
|
fetchTimes: [],
|
|
notifyTimes: [],
|
|
callbackTimes: [],
|
|
successCount: 0,
|
|
failureCount: 0,
|
|
lastReset: Date.now()
|
|
};
|
|
private maxLogs = 1000;
|
|
private maxMetrics = 100;
|
|
|
|
/**
|
|
* Log structured event with event code
|
|
*/
|
|
logEvent(
|
|
level: 'INFO' | 'WARN' | 'ERROR',
|
|
eventCode: string,
|
|
message: string,
|
|
data?: Record<string, unknown>,
|
|
duration?: number
|
|
): void {
|
|
const event: EventLog = {
|
|
id: this.generateEventId(),
|
|
timestamp: Date.now(),
|
|
level,
|
|
eventCode,
|
|
message,
|
|
data,
|
|
duration
|
|
};
|
|
|
|
this.eventLogs.unshift(event);
|
|
|
|
// Keep only recent logs
|
|
if (this.eventLogs.length > this.maxLogs) {
|
|
this.eventLogs = this.eventLogs.slice(0, this.maxLogs);
|
|
}
|
|
|
|
// Console output with structured format
|
|
const logMessage = `[${eventCode}] ${message}`;
|
|
const logData = data ? ` | Data: ${JSON.stringify(data)}` : '';
|
|
const logDuration = duration ? ` | Duration: ${duration}ms` : '';
|
|
|
|
switch (level) {
|
|
case 'INFO':
|
|
console.log(logMessage + logData + logDuration);
|
|
break;
|
|
case 'WARN':
|
|
console.warn(logMessage + logData + logDuration);
|
|
break;
|
|
case 'ERROR':
|
|
console.error(logMessage + logData + logDuration);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Record performance metrics
|
|
*/
|
|
recordMetric(type: 'fetch' | 'notify' | 'callback', duration: number, success: boolean): void {
|
|
switch (type) {
|
|
case 'fetch':
|
|
this.performanceMetrics.fetchTimes.push(duration);
|
|
break;
|
|
case 'notify':
|
|
this.performanceMetrics.notifyTimes.push(duration);
|
|
break;
|
|
case 'callback':
|
|
this.performanceMetrics.callbackTimes.push(duration);
|
|
break;
|
|
}
|
|
|
|
if (success) {
|
|
this.performanceMetrics.successCount++;
|
|
} else {
|
|
this.performanceMetrics.failureCount++;
|
|
}
|
|
|
|
// Keep only recent metrics
|
|
this.trimMetrics();
|
|
}
|
|
|
|
/**
|
|
* Get health status
|
|
*/
|
|
async getHealthStatus(): Promise<HealthStatus> {
|
|
const now = Date.now();
|
|
const recentLogs = this.eventLogs.filter(log => now - log.timestamp < 24 * 60 * 60 * 1000); // Last 24 hours
|
|
|
|
// Calculate next runs (mock implementation)
|
|
const nextRuns = this.calculateNextRuns();
|
|
|
|
// Get last outcomes from recent logs
|
|
const lastOutcomes = recentLogs
|
|
.filter(log => log.eventCode.startsWith('DNP-FETCH-') || log.eventCode.startsWith('DNP-NOTIFY-'))
|
|
.slice(0, 10)
|
|
.map(log => log.eventCode);
|
|
|
|
// Calculate cache age (mock implementation)
|
|
const cacheAgeMs = this.calculateCacheAge();
|
|
|
|
// Check if stale armed
|
|
const staleArmed = cacheAgeMs ? cacheAgeMs > 3600000 : true; // 1 hour
|
|
|
|
// Calculate queue depth
|
|
const queueDepth = recentLogs.filter(log =>
|
|
log.eventCode.includes('QUEUE') || log.eventCode.includes('RETRY')
|
|
).length;
|
|
|
|
// Circuit breaker status
|
|
const circuitBreakers = this.getCircuitBreakerStatus();
|
|
|
|
// Performance metrics
|
|
const performance = this.calculatePerformanceMetrics();
|
|
|
|
return {
|
|
nextRuns,
|
|
lastOutcomes,
|
|
cacheAgeMs,
|
|
staleArmed,
|
|
queueDepth,
|
|
circuitBreakers,
|
|
performance
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get recent event logs
|
|
*/
|
|
getRecentLogs(limit: number = 50): EventLog[] {
|
|
return this.eventLogs.slice(0, limit);
|
|
}
|
|
|
|
/**
|
|
* Get performance metrics
|
|
*/
|
|
getPerformanceMetrics(): PerformanceMetrics {
|
|
return { ...this.performanceMetrics };
|
|
}
|
|
|
|
/**
|
|
* Reset performance metrics
|
|
*/
|
|
resetMetrics(): void {
|
|
this.performanceMetrics = {
|
|
fetchTimes: [],
|
|
notifyTimes: [],
|
|
callbackTimes: [],
|
|
successCount: 0,
|
|
failureCount: 0,
|
|
lastReset: Date.now()
|
|
};
|
|
|
|
this.logEvent('INFO', 'DNP-METRICS-RESET', 'Performance metrics reset');
|
|
}
|
|
|
|
/**
|
|
* Compact old logs (called by cleanup job)
|
|
*/
|
|
compactLogs(olderThanMs: number = 30 * 24 * 60 * 60 * 1000): number { // 30 days
|
|
const cutoff = Date.now() - olderThanMs;
|
|
const initialCount = this.eventLogs.length;
|
|
|
|
this.eventLogs = this.eventLogs.filter(log => log.timestamp >= cutoff);
|
|
|
|
const removedCount = initialCount - this.eventLogs.length;
|
|
if (removedCount > 0) {
|
|
this.logEvent('INFO', 'DNP-LOGS-COMPACTED', `Removed ${removedCount} old logs`);
|
|
}
|
|
|
|
return removedCount;
|
|
}
|
|
|
|
// Private helper methods
|
|
private generateEventId(): string {
|
|
return `evt_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
}
|
|
|
|
private trimMetrics(): void {
|
|
if (this.performanceMetrics.fetchTimes.length > this.maxMetrics) {
|
|
this.performanceMetrics.fetchTimes = this.performanceMetrics.fetchTimes.slice(-this.maxMetrics);
|
|
}
|
|
if (this.performanceMetrics.notifyTimes.length > this.maxMetrics) {
|
|
this.performanceMetrics.notifyTimes = this.performanceMetrics.notifyTimes.slice(-this.maxMetrics);
|
|
}
|
|
if (this.performanceMetrics.callbackTimes.length > this.maxMetrics) {
|
|
this.performanceMetrics.callbackTimes = this.performanceMetrics.callbackTimes.slice(-this.maxMetrics);
|
|
}
|
|
}
|
|
|
|
private calculateNextRuns(): number[] {
|
|
// Mock implementation - would calculate from actual schedules
|
|
const now = Date.now();
|
|
return [
|
|
now + (60 * 60 * 1000), // 1 hour from now
|
|
now + (24 * 60 * 60 * 1000) // 24 hours from now
|
|
];
|
|
}
|
|
|
|
private calculateCacheAge(): number | null {
|
|
// Mock implementation - would get from actual cache
|
|
return 1800000; // 30 minutes
|
|
}
|
|
|
|
private getCircuitBreakerStatus(): { total: number; open: number; failures: number } {
|
|
// Mock implementation - would get from actual circuit breakers
|
|
return {
|
|
total: 3,
|
|
open: 1,
|
|
failures: 5
|
|
};
|
|
}
|
|
|
|
private calculatePerformanceMetrics(): {
|
|
avgFetchTime: number;
|
|
avgNotifyTime: number;
|
|
successRate: number;
|
|
} {
|
|
const fetchTimes = this.performanceMetrics.fetchTimes;
|
|
const notifyTimes = this.performanceMetrics.notifyTimes;
|
|
const totalOperations = this.performanceMetrics.successCount + this.performanceMetrics.failureCount;
|
|
|
|
return {
|
|
avgFetchTime: fetchTimes.length > 0 ?
|
|
fetchTimes.reduce((a, b) => a + b, 0) / fetchTimes.length : 0,
|
|
avgNotifyTime: notifyTimes.length > 0 ?
|
|
notifyTimes.reduce((a, b) => a + b, 0) / notifyTimes.length : 0,
|
|
successRate: totalOperations > 0 ?
|
|
this.performanceMetrics.successCount / totalOperations : 0
|
|
};
|
|
}
|
|
}
|
|
|
|
// Singleton instance
|
|
export const observability = new ObservabilityManager();
|
|
|
|
// Event code constants
|
|
export const EVENT_CODES = {
|
|
FETCH_START: 'DNP-FETCH-START',
|
|
FETCH_SUCCESS: 'DNP-FETCH-SUCCESS',
|
|
FETCH_FAILURE: 'DNP-FETCH-FAILURE',
|
|
FETCH_RETRY: 'DNP-FETCH-RETRY',
|
|
NOTIFY_START: 'DNP-NOTIFY-START',
|
|
NOTIFY_SUCCESS: 'DNP-NOTIFY-SUCCESS',
|
|
NOTIFY_FAILURE: 'DNP-NOTIFY-FAILURE',
|
|
NOTIFY_SKIPPED_TTL: 'DNP-NOTIFY-SKIPPED-TTL',
|
|
CALLBACK_START: 'DNP-CB-START',
|
|
CALLBACK_SUCCESS: 'DNP-CB-SUCCESS',
|
|
CALLBACK_FAILURE: 'DNP-CB-FAILURE',
|
|
CALLBACK_RETRY: 'DNP-CB-RETRY',
|
|
CALLBACK_CIRCUIT_OPEN: 'DNP-CB-CIRCUIT-OPEN',
|
|
CALLBACK_CIRCUIT_CLOSE: 'DNP-CB-CIRCUIT-CLOSE',
|
|
BOOT_RECOVERY: 'DNP-BOOT-RECOVERY',
|
|
SCHEDULE_UPDATE: 'DNP-SCHEDULE-UPDATE',
|
|
CACHE_HIT: 'DNP-CACHE-HIT',
|
|
CACHE_MISS: 'DNP-CACHE-MISS',
|
|
TTL_EXPIRED: 'DNP-TTL-EXPIRED',
|
|
METRICS_RESET: 'DNP-METRICS-RESET',
|
|
LOGS_COMPACTED: 'DNP-LOGS-COMPACTED'
|
|
} as const;
|
|
|