You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

313 lines
10 KiB

/**
* Telemetry management with cardinality budgets
*/
import { TelemetryMetrics, TelemetryLogs } from './types';
import { hashDid, redactPii } from './validation';
export class TelemetryManager {
private metrics: Map<string, any> = new Map();
private logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR';
constructor(logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR' = 'INFO') {
this.logLevel = logLevel;
this.registerMetrics();
}
private registerMetrics(): void {
// Counter metrics (low cardinality)
this.metrics.set('starred_projects_poll_attempts_total',
this.createCounter('starred_projects_poll_attempts_total', 'Total number of polling attempts'));
this.metrics.set('starred_projects_poll_success_total',
this.createCounter('starred_projects_poll_success_total', 'Total number of successful polls'));
this.metrics.set('starred_projects_poll_failure_total',
this.createCounter('starred_projects_poll_failure_total', 'Total number of failed polls'));
this.metrics.set('starred_projects_changes_found_total',
this.createCounter('starred_projects_changes_found_total', 'Total number of changes found'));
this.metrics.set('starred_projects_notifications_generated_total',
this.createCounter('starred_projects_notifications_generated_total', 'Total notifications generated'));
this.metrics.set('starred_projects_error_total',
this.createCounter('starred_projects_error_total', 'Total number of errors'));
this.metrics.set('starred_projects_rate_limit_total',
this.createCounter('starred_projects_rate_limit_total', 'Total number of rate limit hits'));
// Histogram metrics (low cardinality)
this.metrics.set('starred_projects_poll_duration_seconds',
this.createHistogram('starred_projects_poll_duration_seconds', 'Polling duration in seconds',
[0.1, 0.5, 1, 2, 5, 10, 30]));
this.metrics.set('starred_projects_api_latency_seconds',
this.createHistogram('starred_projects_api_latency_seconds', 'API latency in seconds',
[0.05, 0.1, 0.25, 0.5, 1, 2, 5]));
// Gauge metrics (low cardinality)
this.metrics.set('starred_projects_outbox_size',
this.createGauge('starred_projects_outbox_size', 'Current number of undelivered notifications'));
this.metrics.set('starred_projects_outbox_backpressure_active',
this.createGauge('starred_projects_outbox_backpressure_active', 'Backpressure active (0/1)'));
this.metrics.set('starred_projects_api_throughput_rps',
this.createGauge('starred_projects_api_throughput_rps', 'API throughput in requests per second'));
}
private createCounter(name: string, help: string): Record<string, unknown> {
// Mock counter implementation
return {
name,
help,
type: 'counter',
value: 0,
inc: () => { this.metrics.get(name)!.value++; }
};
}
private createHistogram(name: string, help: string, buckets: number[]): Record<string, unknown> {
// Mock histogram implementation
return {
name,
help,
type: 'histogram',
buckets,
values: new Array(buckets.length + 1).fill(0),
observe: (value: number) => {
const metric = this.metrics.get(name)!;
// Find bucket and increment
for (let i = 0; i < buckets.length; i++) {
if (value <= buckets[i]) {
metric.values[i]++;
return;
}
}
metric.values[buckets.length]++; // +Inf bucket
}
};
}
private createGauge(name: string, help: string): Record<string, unknown> {
// Mock gauge implementation
return {
name,
help,
type: 'gauge',
value: 0,
set: (value: number) => { this.metrics.get(name)!.value = value; }
};
}
// Low-cardinality metric recording
recordPollAttempt(): void {
this.metrics.get('starred_projects_poll_attempts_total')?.inc();
}
recordPollSuccess(durationSeconds: number): void {
this.metrics.get('starred_projects_poll_success_total')?.inc();
this.metrics.get('starred_projects_poll_duration_seconds')?.observe(durationSeconds);
}
recordPollFailure(): void {
this.metrics.get('starred_projects_poll_failure_total')?.inc();
}
recordChangesFound(count: number): void {
for (let i = 0; i < count; i++) {
this.metrics.get('starred_projects_changes_found_total')?.inc();
}
}
recordNotificationsGenerated(count: number): void {
for (let i = 0; i < count; i++) {
this.metrics.get('starred_projects_notifications_generated_total')?.inc();
}
}
recordError(): void {
this.metrics.get('starred_projects_error_total')?.inc();
}
recordRateLimit(): void {
this.metrics.get('starred_projects_rate_limit_total')?.inc();
}
recordApiLatency(latencySeconds: number): void {
this.metrics.get('starred_projects_api_latency_seconds')?.observe(latencySeconds);
}
recordOutboxSize(size: number): void {
this.metrics.get('starred_projects_outbox_size')?.set(size);
}
recordBackpressureActive(active: boolean): void {
this.metrics.get('starred_projects_outbox_backpressure_active')?.set(active ? 1 : 0);
}
recordApiThroughput(rps: number): void {
this.metrics.get('starred_projects_api_throughput_rps')?.set(rps);
}
// High-cardinality data (logs only, not metrics)
logPollingEvent(event: TelemetryLogs): void {
if (this.shouldLog('INFO')) {
const redactedEvent = redactPii({
...event,
activeDid: hashDid(event.activeDid) // Hash for privacy
});
console.log('Polling event:', redactedEvent);
}
}
logError(error: Error, context?: Record<string, any>): void {
if (this.shouldLog('ERROR')) {
const redactedContext = context ? redactPii(context) : undefined;
console.error('Polling error:', {
message: error.message,
stack: error.stack,
context: redactedContext
});
}
}
logWarning(message: string, context?: Record<string, any>): void {
if (this.shouldLog('WARN')) {
const redactedContext = context ? redactPii(context) : undefined;
console.warn('Polling warning:', { message, context: redactedContext });
}
}
logDebug(message: string, context?: Record<string, any>): void {
if (this.shouldLog('DEBUG')) {
const redactedContext = context ? redactPii(context) : undefined;
console.debug('Polling debug:', { message, context: redactedContext });
}
}
private shouldLog(level: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR'): boolean {
const levels = { DEBUG: 0, INFO: 1, WARN: 2, ERROR: 3 };
return levels[level] >= levels[this.logLevel];
}
// Get all metrics for export
getMetrics(): TelemetryMetrics {
const metrics: Record<string, unknown> = {};
for (const [name, metric] of this.metrics) {
metrics[name] = metric.value;
}
return metrics as TelemetryMetrics;
}
// Get metrics in Prometheus format
getPrometheusMetrics(): string {
let output = '';
for (const [name, metric] of this.metrics) {
output += `# HELP ${name} ${metric.help}\n`;
output += `# TYPE ${name} ${metric.type}\n`;
if (metric.type === 'histogram') {
// Export histogram buckets
for (let i = 0; i < metric.buckets.length; i++) {
output += `${name}_bucket{le="${metric.buckets[i]}"} ${metric.values[i]}\n`;
}
output += `${name}_bucket{le="+Inf"} ${metric.values[metric.buckets.length]}\n`;
output += `${name}_count ${metric.values.reduce((a: number, b: number) => a + b, 0)}\n`;
} else {
output += `${name} ${metric.value}\n`;
}
}
return output;
}
}
/**
* Lint rule to prevent high-cardinality labels in metrics
*/
export function validateMetricLabels(labels: Record<string, string>): void {
const highCardinalityPatterns = [
/requestId/i,
/activeDid/i,
/jwtId/i,
/userId/i,
/sessionId/i,
/traceId/i,
/spanId/i
];
for (const [key, value] of Object.entries(labels)) {
for (const pattern of highCardinalityPatterns) {
if (pattern.test(key)) {
throw new Error(
`High-cardinality label detected: ${key}. ` +
`Use logs for request-level data, not metrics. ` +
`Consider using a hash or removing the label.`
);
}
}
// Check for high-cardinality values
if (value.length > 50 || /^[a-f0-9]{32,}$/.test(value)) {
throw new Error(
`High-cardinality value detected for label ${key}: ${value}. ` +
`Consider using a hash or removing the label.`
);
}
}
}
/**
* Safe metric recording with validation
*/
export function recordMetricWithValidation(
telemetry: TelemetryManager,
metricName: string,
value: number,
labels?: Record<string, string>
): void {
if (labels) {
validateMetricLabels(labels);
}
// Record metric based on type
switch (metricName) {
case 'starred_projects_poll_attempts_total':
telemetry.recordPollAttempt();
break;
case 'starred_projects_poll_success_total':
telemetry.recordPollSuccess(value);
break;
case 'starred_projects_poll_failure_total':
telemetry.recordPollFailure();
break;
case 'starred_projects_changes_found_total':
telemetry.recordChangesFound(value);
break;
case 'starred_projects_notifications_generated_total':
telemetry.recordNotificationsGenerated(value);
break;
case 'starred_projects_error_total':
telemetry.recordError();
break;
case 'starred_projects_rate_limit_total':
telemetry.recordRateLimit();
break;
case 'starred_projects_api_latency_seconds':
telemetry.recordApiLatency(value);
break;
case 'starred_projects_outbox_size':
telemetry.recordOutboxSize(value);
break;
case 'starred_projects_outbox_backpressure_active':
telemetry.recordBackpressureActive(value > 0);
break;
case 'starred_projects_api_throughput_rps':
telemetry.recordApiThroughput(value);
break;
default:
throw new Error(`Unknown metric: ${metricName}`);
}
}