/** * Telemetry management with cardinality budgets */ import { TelemetryMetrics, TelemetryLogs } from './types'; import { hashDid, redactPii } from './validation'; export class TelemetryManager { private metrics: Map = new Map(); private logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR'; constructor(logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR' = 'INFO') { this.logLevel = logLevel; this.registerMetrics(); } private registerMetrics(): void { // Counter metrics (low cardinality) this.metrics.set('starred_projects_poll_attempts_total', this.createCounter('starred_projects_poll_attempts_total', 'Total number of polling attempts')); this.metrics.set('starred_projects_poll_success_total', this.createCounter('starred_projects_poll_success_total', 'Total number of successful polls')); this.metrics.set('starred_projects_poll_failure_total', this.createCounter('starred_projects_poll_failure_total', 'Total number of failed polls')); this.metrics.set('starred_projects_changes_found_total', this.createCounter('starred_projects_changes_found_total', 'Total number of changes found')); this.metrics.set('starred_projects_notifications_generated_total', this.createCounter('starred_projects_notifications_generated_total', 'Total notifications generated')); this.metrics.set('starred_projects_error_total', this.createCounter('starred_projects_error_total', 'Total number of errors')); this.metrics.set('starred_projects_rate_limit_total', this.createCounter('starred_projects_rate_limit_total', 'Total number of rate limit hits')); // Histogram metrics (low cardinality) this.metrics.set('starred_projects_poll_duration_seconds', this.createHistogram('starred_projects_poll_duration_seconds', 'Polling duration in seconds', [0.1, 0.5, 1, 2, 5, 10, 30])); this.metrics.set('starred_projects_api_latency_seconds', this.createHistogram('starred_projects_api_latency_seconds', 'API latency in seconds', [0.05, 0.1, 0.25, 0.5, 1, 2, 5])); // Gauge metrics (low cardinality) this.metrics.set('starred_projects_outbox_size', this.createGauge('starred_projects_outbox_size', 'Current number of undelivered notifications')); this.metrics.set('starred_projects_outbox_backpressure_active', this.createGauge('starred_projects_outbox_backpressure_active', 'Backpressure active (0/1)')); this.metrics.set('starred_projects_api_throughput_rps', this.createGauge('starred_projects_api_throughput_rps', 'API throughput in requests per second')); } private createCounter(name: string, help: string): Record { // Mock counter implementation return { name, help, type: 'counter', value: 0, inc: () => { this.metrics.get(name)!.value++; } }; } private createHistogram(name: string, help: string, buckets: number[]): Record { // Mock histogram implementation return { name, help, type: 'histogram', buckets, values: new Array(buckets.length + 1).fill(0), observe: (value: number) => { const metric = this.metrics.get(name)!; // Find bucket and increment for (let i = 0; i < buckets.length; i++) { if (value <= buckets[i]) { metric.values[i]++; return; } } metric.values[buckets.length]++; // +Inf bucket } }; } private createGauge(name: string, help: string): Record { // Mock gauge implementation return { name, help, type: 'gauge', value: 0, set: (value: number) => { this.metrics.get(name)!.value = value; } }; } // Low-cardinality metric recording recordPollAttempt(): void { this.metrics.get('starred_projects_poll_attempts_total')?.inc(); } recordPollSuccess(durationSeconds: number): void { this.metrics.get('starred_projects_poll_success_total')?.inc(); this.metrics.get('starred_projects_poll_duration_seconds')?.observe(durationSeconds); } recordPollFailure(): void { this.metrics.get('starred_projects_poll_failure_total')?.inc(); } recordChangesFound(count: number): void { for (let i = 0; i < count; i++) { this.metrics.get('starred_projects_changes_found_total')?.inc(); } } recordNotificationsGenerated(count: number): void { for (let i = 0; i < count; i++) { this.metrics.get('starred_projects_notifications_generated_total')?.inc(); } } recordError(): void { this.metrics.get('starred_projects_error_total')?.inc(); } recordRateLimit(): void { this.metrics.get('starred_projects_rate_limit_total')?.inc(); } recordApiLatency(latencySeconds: number): void { this.metrics.get('starred_projects_api_latency_seconds')?.observe(latencySeconds); } recordOutboxSize(size: number): void { this.metrics.get('starred_projects_outbox_size')?.set(size); } recordBackpressureActive(active: boolean): void { this.metrics.get('starred_projects_outbox_backpressure_active')?.set(active ? 1 : 0); } recordApiThroughput(rps: number): void { this.metrics.get('starred_projects_api_throughput_rps')?.set(rps); } // High-cardinality data (logs only, not metrics) logPollingEvent(event: TelemetryLogs): void { if (this.shouldLog('INFO')) { const redactedEvent = redactPii({ ...event, activeDid: hashDid(event.activeDid) // Hash for privacy }); console.log('Polling event:', redactedEvent); } } logError(error: Error, context?: Record): void { if (this.shouldLog('ERROR')) { const redactedContext = context ? redactPii(context) : undefined; console.error('Polling error:', { message: error.message, stack: error.stack, context: redactedContext }); } } logWarning(message: string, context?: Record): void { if (this.shouldLog('WARN')) { const redactedContext = context ? redactPii(context) : undefined; console.warn('Polling warning:', { message, context: redactedContext }); } } logDebug(message: string, context?: Record): void { if (this.shouldLog('DEBUG')) { const redactedContext = context ? redactPii(context) : undefined; console.debug('Polling debug:', { message, context: redactedContext }); } } private shouldLog(level: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR'): boolean { const levels = { DEBUG: 0, INFO: 1, WARN: 2, ERROR: 3 }; return levels[level] >= levels[this.logLevel]; } // Get all metrics for export getMetrics(): TelemetryMetrics { const metrics: Record = {}; for (const [name, metric] of this.metrics) { metrics[name] = metric.value; } return metrics as TelemetryMetrics; } // Get metrics in Prometheus format getPrometheusMetrics(): string { let output = ''; for (const [name, metric] of this.metrics) { output += `# HELP ${name} ${metric.help}\n`; output += `# TYPE ${name} ${metric.type}\n`; if (metric.type === 'histogram') { // Export histogram buckets for (let i = 0; i < metric.buckets.length; i++) { output += `${name}_bucket{le="${metric.buckets[i]}"} ${metric.values[i]}\n`; } output += `${name}_bucket{le="+Inf"} ${metric.values[metric.buckets.length]}\n`; output += `${name}_count ${metric.values.reduce((a: number, b: number) => a + b, 0)}\n`; } else { output += `${name} ${metric.value}\n`; } } return output; } } /** * Lint rule to prevent high-cardinality labels in metrics */ export function validateMetricLabels(labels: Record): void { const highCardinalityPatterns = [ /requestId/i, /activeDid/i, /jwtId/i, /userId/i, /sessionId/i, /traceId/i, /spanId/i ]; for (const [key, value] of Object.entries(labels)) { for (const pattern of highCardinalityPatterns) { if (pattern.test(key)) { throw new Error( `High-cardinality label detected: ${key}. ` + `Use logs for request-level data, not metrics. ` + `Consider using a hash or removing the label.` ); } } // Check for high-cardinality values if (value.length > 50 || /^[a-f0-9]{32,}$/.test(value)) { throw new Error( `High-cardinality value detected for label ${key}: ${value}. ` + `Consider using a hash or removing the label.` ); } } } /** * Safe metric recording with validation */ export function recordMetricWithValidation( telemetry: TelemetryManager, metricName: string, value: number, labels?: Record ): void { if (labels) { validateMetricLabels(labels); } // Record metric based on type switch (metricName) { case 'starred_projects_poll_attempts_total': telemetry.recordPollAttempt(); break; case 'starred_projects_poll_success_total': telemetry.recordPollSuccess(value); break; case 'starred_projects_poll_failure_total': telemetry.recordPollFailure(); break; case 'starred_projects_changes_found_total': telemetry.recordChangesFound(value); break; case 'starred_projects_notifications_generated_total': telemetry.recordNotificationsGenerated(value); break; case 'starred_projects_error_total': telemetry.recordError(); break; case 'starred_projects_rate_limit_total': telemetry.recordRateLimit(); break; case 'starred_projects_api_latency_seconds': telemetry.recordApiLatency(value); break; case 'starred_projects_outbox_size': telemetry.recordOutboxSize(value); break; case 'starred_projects_outbox_backpressure_active': telemetry.recordBackpressureActive(value > 0); break; case 'starred_projects_api_throughput_rps': telemetry.recordApiThroughput(value); break; default: throw new Error(`Unknown metric: ${metricName}`); } }