You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
313 lines
10 KiB
313 lines
10 KiB
/**
|
|
* Telemetry management with cardinality budgets
|
|
*/
|
|
|
|
import { TelemetryMetrics, TelemetryLogs } from './types';
|
|
import { hashDid, redactPii } from './validation';
|
|
|
|
export class TelemetryManager {
|
|
private metrics: Map<string, any> = new Map();
|
|
private logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR';
|
|
|
|
constructor(logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR' = 'INFO') {
|
|
this.logLevel = logLevel;
|
|
this.registerMetrics();
|
|
}
|
|
|
|
private registerMetrics(): void {
|
|
// Counter metrics (low cardinality)
|
|
this.metrics.set('starred_projects_poll_attempts_total',
|
|
this.createCounter('starred_projects_poll_attempts_total', 'Total number of polling attempts'));
|
|
|
|
this.metrics.set('starred_projects_poll_success_total',
|
|
this.createCounter('starred_projects_poll_success_total', 'Total number of successful polls'));
|
|
|
|
this.metrics.set('starred_projects_poll_failure_total',
|
|
this.createCounter('starred_projects_poll_failure_total', 'Total number of failed polls'));
|
|
|
|
this.metrics.set('starred_projects_changes_found_total',
|
|
this.createCounter('starred_projects_changes_found_total', 'Total number of changes found'));
|
|
|
|
this.metrics.set('starred_projects_notifications_generated_total',
|
|
this.createCounter('starred_projects_notifications_generated_total', 'Total notifications generated'));
|
|
|
|
this.metrics.set('starred_projects_error_total',
|
|
this.createCounter('starred_projects_error_total', 'Total number of errors'));
|
|
|
|
this.metrics.set('starred_projects_rate_limit_total',
|
|
this.createCounter('starred_projects_rate_limit_total', 'Total number of rate limit hits'));
|
|
|
|
// Histogram metrics (low cardinality)
|
|
this.metrics.set('starred_projects_poll_duration_seconds',
|
|
this.createHistogram('starred_projects_poll_duration_seconds', 'Polling duration in seconds',
|
|
[0.1, 0.5, 1, 2, 5, 10, 30]));
|
|
|
|
this.metrics.set('starred_projects_api_latency_seconds',
|
|
this.createHistogram('starred_projects_api_latency_seconds', 'API latency in seconds',
|
|
[0.05, 0.1, 0.25, 0.5, 1, 2, 5]));
|
|
|
|
// Gauge metrics (low cardinality)
|
|
this.metrics.set('starred_projects_outbox_size',
|
|
this.createGauge('starred_projects_outbox_size', 'Current number of undelivered notifications'));
|
|
|
|
this.metrics.set('starred_projects_outbox_backpressure_active',
|
|
this.createGauge('starred_projects_outbox_backpressure_active', 'Backpressure active (0/1)'));
|
|
|
|
this.metrics.set('starred_projects_api_throughput_rps',
|
|
this.createGauge('starred_projects_api_throughput_rps', 'API throughput in requests per second'));
|
|
}
|
|
|
|
private createCounter(name: string, help: string): Record<string, unknown> {
|
|
// Mock counter implementation
|
|
return {
|
|
name,
|
|
help,
|
|
type: 'counter',
|
|
value: 0,
|
|
inc: () => { this.metrics.get(name)!.value++; }
|
|
};
|
|
}
|
|
|
|
private createHistogram(name: string, help: string, buckets: number[]): Record<string, unknown> {
|
|
// Mock histogram implementation
|
|
return {
|
|
name,
|
|
help,
|
|
type: 'histogram',
|
|
buckets,
|
|
values: new Array(buckets.length + 1).fill(0),
|
|
observe: (value: number) => {
|
|
const metric = this.metrics.get(name)!;
|
|
// Find bucket and increment
|
|
for (let i = 0; i < buckets.length; i++) {
|
|
if (value <= buckets[i]) {
|
|
metric.values[i]++;
|
|
return;
|
|
}
|
|
}
|
|
metric.values[buckets.length]++; // +Inf bucket
|
|
}
|
|
};
|
|
}
|
|
|
|
private createGauge(name: string, help: string): Record<string, unknown> {
|
|
// Mock gauge implementation
|
|
return {
|
|
name,
|
|
help,
|
|
type: 'gauge',
|
|
value: 0,
|
|
set: (value: number) => { this.metrics.get(name)!.value = value; }
|
|
};
|
|
}
|
|
|
|
// Low-cardinality metric recording
|
|
recordPollAttempt(): void {
|
|
this.metrics.get('starred_projects_poll_attempts_total')?.inc();
|
|
}
|
|
|
|
recordPollSuccess(durationSeconds: number): void {
|
|
this.metrics.get('starred_projects_poll_success_total')?.inc();
|
|
this.metrics.get('starred_projects_poll_duration_seconds')?.observe(durationSeconds);
|
|
}
|
|
|
|
recordPollFailure(): void {
|
|
this.metrics.get('starred_projects_poll_failure_total')?.inc();
|
|
}
|
|
|
|
recordChangesFound(count: number): void {
|
|
for (let i = 0; i < count; i++) {
|
|
this.metrics.get('starred_projects_changes_found_total')?.inc();
|
|
}
|
|
}
|
|
|
|
recordNotificationsGenerated(count: number): void {
|
|
for (let i = 0; i < count; i++) {
|
|
this.metrics.get('starred_projects_notifications_generated_total')?.inc();
|
|
}
|
|
}
|
|
|
|
recordError(): void {
|
|
this.metrics.get('starred_projects_error_total')?.inc();
|
|
}
|
|
|
|
recordRateLimit(): void {
|
|
this.metrics.get('starred_projects_rate_limit_total')?.inc();
|
|
}
|
|
|
|
recordApiLatency(latencySeconds: number): void {
|
|
this.metrics.get('starred_projects_api_latency_seconds')?.observe(latencySeconds);
|
|
}
|
|
|
|
recordOutboxSize(size: number): void {
|
|
this.metrics.get('starred_projects_outbox_size')?.set(size);
|
|
}
|
|
|
|
recordBackpressureActive(active: boolean): void {
|
|
this.metrics.get('starred_projects_outbox_backpressure_active')?.set(active ? 1 : 0);
|
|
}
|
|
|
|
recordApiThroughput(rps: number): void {
|
|
this.metrics.get('starred_projects_api_throughput_rps')?.set(rps);
|
|
}
|
|
|
|
// High-cardinality data (logs only, not metrics)
|
|
logPollingEvent(event: TelemetryLogs): void {
|
|
if (this.shouldLog('INFO')) {
|
|
const redactedEvent = redactPii({
|
|
...event,
|
|
activeDid: hashDid(event.activeDid) // Hash for privacy
|
|
});
|
|
|
|
console.log('Polling event:', redactedEvent);
|
|
}
|
|
}
|
|
|
|
logError(error: Error, context?: Record<string, any>): void {
|
|
if (this.shouldLog('ERROR')) {
|
|
const redactedContext = context ? redactPii(context) : undefined;
|
|
console.error('Polling error:', {
|
|
message: error.message,
|
|
stack: error.stack,
|
|
context: redactedContext
|
|
});
|
|
}
|
|
}
|
|
|
|
logWarning(message: string, context?: Record<string, any>): void {
|
|
if (this.shouldLog('WARN')) {
|
|
const redactedContext = context ? redactPii(context) : undefined;
|
|
console.warn('Polling warning:', { message, context: redactedContext });
|
|
}
|
|
}
|
|
|
|
logDebug(message: string, context?: Record<string, any>): void {
|
|
if (this.shouldLog('DEBUG')) {
|
|
const redactedContext = context ? redactPii(context) : undefined;
|
|
console.debug('Polling debug:', { message, context: redactedContext });
|
|
}
|
|
}
|
|
|
|
private shouldLog(level: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR'): boolean {
|
|
const levels = { DEBUG: 0, INFO: 1, WARN: 2, ERROR: 3 };
|
|
return levels[level] >= levels[this.logLevel];
|
|
}
|
|
|
|
// Get all metrics for export
|
|
getMetrics(): TelemetryMetrics {
|
|
const metrics: Record<string, unknown> = {};
|
|
for (const [name, metric] of this.metrics) {
|
|
metrics[name] = metric.value;
|
|
}
|
|
return metrics as TelemetryMetrics;
|
|
}
|
|
|
|
// Get metrics in Prometheus format
|
|
getPrometheusMetrics(): string {
|
|
let output = '';
|
|
for (const [name, metric] of this.metrics) {
|
|
output += `# HELP ${name} ${metric.help}\n`;
|
|
output += `# TYPE ${name} ${metric.type}\n`;
|
|
|
|
if (metric.type === 'histogram') {
|
|
// Export histogram buckets
|
|
for (let i = 0; i < metric.buckets.length; i++) {
|
|
output += `${name}_bucket{le="${metric.buckets[i]}"} ${metric.values[i]}\n`;
|
|
}
|
|
output += `${name}_bucket{le="+Inf"} ${metric.values[metric.buckets.length]}\n`;
|
|
output += `${name}_count ${metric.values.reduce((a: number, b: number) => a + b, 0)}\n`;
|
|
} else {
|
|
output += `${name} ${metric.value}\n`;
|
|
}
|
|
}
|
|
return output;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lint rule to prevent high-cardinality labels in metrics
|
|
*/
|
|
export function validateMetricLabels(labels: Record<string, string>): void {
|
|
const highCardinalityPatterns = [
|
|
/requestId/i,
|
|
/activeDid/i,
|
|
/jwtId/i,
|
|
/userId/i,
|
|
/sessionId/i,
|
|
/traceId/i,
|
|
/spanId/i
|
|
];
|
|
|
|
for (const [key, value] of Object.entries(labels)) {
|
|
for (const pattern of highCardinalityPatterns) {
|
|
if (pattern.test(key)) {
|
|
throw new Error(
|
|
`High-cardinality label detected: ${key}. ` +
|
|
`Use logs for request-level data, not metrics. ` +
|
|
`Consider using a hash or removing the label.`
|
|
);
|
|
}
|
|
}
|
|
|
|
// Check for high-cardinality values
|
|
if (value.length > 50 || /^[a-f0-9]{32,}$/.test(value)) {
|
|
throw new Error(
|
|
`High-cardinality value detected for label ${key}: ${value}. ` +
|
|
`Consider using a hash or removing the label.`
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Safe metric recording with validation
|
|
*/
|
|
export function recordMetricWithValidation(
|
|
telemetry: TelemetryManager,
|
|
metricName: string,
|
|
value: number,
|
|
labels?: Record<string, string>
|
|
): void {
|
|
if (labels) {
|
|
validateMetricLabels(labels);
|
|
}
|
|
|
|
// Record metric based on type
|
|
switch (metricName) {
|
|
case 'starred_projects_poll_attempts_total':
|
|
telemetry.recordPollAttempt();
|
|
break;
|
|
case 'starred_projects_poll_success_total':
|
|
telemetry.recordPollSuccess(value);
|
|
break;
|
|
case 'starred_projects_poll_failure_total':
|
|
telemetry.recordPollFailure();
|
|
break;
|
|
case 'starred_projects_changes_found_total':
|
|
telemetry.recordChangesFound(value);
|
|
break;
|
|
case 'starred_projects_notifications_generated_total':
|
|
telemetry.recordNotificationsGenerated(value);
|
|
break;
|
|
case 'starred_projects_error_total':
|
|
telemetry.recordError();
|
|
break;
|
|
case 'starred_projects_rate_limit_total':
|
|
telemetry.recordRateLimit();
|
|
break;
|
|
case 'starred_projects_api_latency_seconds':
|
|
telemetry.recordApiLatency(value);
|
|
break;
|
|
case 'starred_projects_outbox_size':
|
|
telemetry.recordOutboxSize(value);
|
|
break;
|
|
case 'starred_projects_outbox_backpressure_active':
|
|
telemetry.recordBackpressureActive(value > 0);
|
|
break;
|
|
case 'starred_projects_api_throughput_rps':
|
|
telemetry.recordApiThroughput(value);
|
|
break;
|
|
default:
|
|
throw new Error(`Unknown metric: ${metricName}`);
|
|
}
|
|
}
|
|
|