You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
369 lines
13 KiB
369 lines
13 KiB
/**
|
|
* Telemetry management with cardinality budgets
|
|
*/
|
|
|
|
import { TelemetryMetrics, TelemetryLogs } from './types';
|
|
import { hashDid, redactPii } from './validation';
|
|
|
|
export class TelemetryManager {
|
|
private metrics: Map<string, Record<string, unknown>> = new Map();
|
|
private logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR';
|
|
|
|
constructor(logLevel: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR' = 'INFO') {
|
|
this.logLevel = logLevel;
|
|
this.registerMetrics();
|
|
}
|
|
|
|
private registerMetrics(): void {
|
|
// Counter metrics (low cardinality)
|
|
this.metrics.set('starred_projects_poll_attempts_total',
|
|
this.createCounter('starred_projects_poll_attempts_total', 'Total number of polling attempts'));
|
|
|
|
this.metrics.set('starred_projects_poll_success_total',
|
|
this.createCounter('starred_projects_poll_success_total', 'Total number of successful polls'));
|
|
|
|
this.metrics.set('starred_projects_poll_failure_total',
|
|
this.createCounter('starred_projects_poll_failure_total', 'Total number of failed polls'));
|
|
|
|
this.metrics.set('starred_projects_changes_found_total',
|
|
this.createCounter('starred_projects_changes_found_total', 'Total number of changes found'));
|
|
|
|
this.metrics.set('starred_projects_notifications_generated_total',
|
|
this.createCounter('starred_projects_notifications_generated_total', 'Total notifications generated'));
|
|
|
|
this.metrics.set('starred_projects_error_total',
|
|
this.createCounter('starred_projects_error_total', 'Total number of errors'));
|
|
|
|
this.metrics.set('starred_projects_rate_limit_total',
|
|
this.createCounter('starred_projects_rate_limit_total', 'Total number of rate limit hits'));
|
|
|
|
// Histogram metrics (low cardinality)
|
|
this.metrics.set('starred_projects_poll_duration_seconds',
|
|
this.createHistogram('starred_projects_poll_duration_seconds', 'Polling duration in seconds',
|
|
[0.1, 0.5, 1, 2, 5, 10, 30]));
|
|
|
|
this.metrics.set('starred_projects_api_latency_seconds',
|
|
this.createHistogram('starred_projects_api_latency_seconds', 'API latency in seconds',
|
|
[0.05, 0.1, 0.25, 0.5, 1, 2, 5]));
|
|
|
|
// Gauge metrics (low cardinality)
|
|
this.metrics.set('starred_projects_outbox_size',
|
|
this.createGauge('starred_projects_outbox_size', 'Current number of undelivered notifications'));
|
|
|
|
this.metrics.set('starred_projects_outbox_backpressure_active',
|
|
this.createGauge('starred_projects_outbox_backpressure_active', 'Backpressure active (0/1)'));
|
|
|
|
this.metrics.set('starred_projects_api_throughput_rps',
|
|
this.createGauge('starred_projects_api_throughput_rps', 'API throughput in requests per second'));
|
|
}
|
|
|
|
private createCounter(name: string, help: string): { name: string; help: string; type: string; value: number; inc: () => void } {
|
|
// Mock counter implementation
|
|
return {
|
|
name,
|
|
help,
|
|
type: 'counter',
|
|
value: 0,
|
|
inc: (): void => {
|
|
const metric = this.metrics.get(name);
|
|
if (metric && typeof metric === 'object' && 'value' in metric) {
|
|
(metric as { value: number }).value++;
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
private createHistogram(name: string, help: string, buckets: number[]): { name: string; help: string; type: string; buckets: number[]; values: number[]; observe: (value: number) => void } {
|
|
// Mock histogram implementation
|
|
return {
|
|
name,
|
|
help,
|
|
type: 'histogram',
|
|
buckets,
|
|
values: new Array(buckets.length + 1).fill(0),
|
|
observe: (value: number): void => {
|
|
const metric = this.metrics.get(name);
|
|
if (!metric || typeof metric !== 'object' || !('values' in metric) || !('buckets' in metric)) return;
|
|
const metricObj = metric as { values: number[]; buckets: number[] };
|
|
// Find bucket and increment
|
|
for (let i = 0; i < metricObj.buckets.length; i++) {
|
|
if (value <= metricObj.buckets[i]) {
|
|
metricObj.values[i]++;
|
|
return;
|
|
}
|
|
}
|
|
metricObj.values[metricObj.buckets.length]++; // +Inf bucket
|
|
}
|
|
};
|
|
}
|
|
|
|
private createGauge(name: string, help: string): { name: string; help: string; type: string; value: number; set: (value: number) => void } {
|
|
// Mock gauge implementation
|
|
return {
|
|
name,
|
|
help,
|
|
type: 'gauge',
|
|
value: 0,
|
|
set: (value: number): void => {
|
|
const metric = this.metrics.get(name);
|
|
if (metric && typeof metric === 'object' && 'value' in metric) {
|
|
(metric as { value: number }).value = value;
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
// Low-cardinality metric recording
|
|
recordPollAttempt(): void {
|
|
const metric = this.metrics.get('starred_projects_poll_attempts_total');
|
|
if (metric && typeof metric === 'object' && 'inc' in metric) {
|
|
(metric as { inc: () => void }).inc();
|
|
}
|
|
}
|
|
|
|
recordPollSuccess(durationSeconds: number): void {
|
|
const successMetric = this.metrics.get('starred_projects_poll_success_total');
|
|
if (successMetric && typeof successMetric === 'object' && 'inc' in successMetric) {
|
|
(successMetric as { inc: () => void }).inc();
|
|
}
|
|
const durationMetric = this.metrics.get('starred_projects_poll_duration_seconds');
|
|
if (durationMetric && typeof durationMetric === 'object' && 'observe' in durationMetric) {
|
|
(durationMetric as { observe: (value: number) => void }).observe(durationSeconds);
|
|
}
|
|
}
|
|
|
|
recordPollFailure(): void {
|
|
const metric = this.metrics.get('starred_projects_poll_failure_total');
|
|
if (metric && typeof metric === 'object' && 'inc' in metric) {
|
|
(metric as { inc: () => void }).inc();
|
|
}
|
|
}
|
|
|
|
recordChangesFound(count: number): void {
|
|
const metric = this.metrics.get('starred_projects_changes_found_total');
|
|
if (metric && typeof metric === 'object' && 'inc' in metric) {
|
|
for (let i = 0; i < count; i++) {
|
|
(metric as { inc: () => void }).inc();
|
|
}
|
|
}
|
|
}
|
|
|
|
recordNotificationsGenerated(count: number): void {
|
|
const metric = this.metrics.get('starred_projects_notifications_generated_total');
|
|
if (metric && typeof metric === 'object' && 'inc' in metric) {
|
|
for (let i = 0; i < count; i++) {
|
|
(metric as { inc: () => void }).inc();
|
|
}
|
|
}
|
|
}
|
|
|
|
recordError(): void {
|
|
const metric = this.metrics.get('starred_projects_error_total');
|
|
if (metric && typeof metric === 'object' && 'inc' in metric) {
|
|
(metric as { inc: () => void }).inc();
|
|
}
|
|
}
|
|
|
|
recordRateLimit(): void {
|
|
const metric = this.metrics.get('starred_projects_rate_limit_total');
|
|
if (metric && typeof metric === 'object' && 'inc' in metric) {
|
|
(metric as { inc: () => void }).inc();
|
|
}
|
|
}
|
|
|
|
recordApiLatency(latencySeconds: number): void {
|
|
const metric = this.metrics.get('starred_projects_api_latency_seconds');
|
|
if (metric && typeof metric === 'object' && 'observe' in metric) {
|
|
(metric as { observe: (value: number) => void }).observe(latencySeconds);
|
|
}
|
|
}
|
|
|
|
recordOutboxSize(size: number): void {
|
|
const metric = this.metrics.get('starred_projects_outbox_size');
|
|
if (metric && typeof metric === 'object' && 'set' in metric) {
|
|
(metric as { set: (value: number) => void }).set(size);
|
|
}
|
|
}
|
|
|
|
recordBackpressureActive(active: boolean): void {
|
|
const metric = this.metrics.get('starred_projects_outbox_backpressure_active');
|
|
if (metric && typeof metric === 'object' && 'set' in metric) {
|
|
(metric as { set: (value: number) => void }).set(active ? 1 : 0);
|
|
}
|
|
}
|
|
|
|
recordApiThroughput(rps: number): void {
|
|
const metric = this.metrics.get('starred_projects_api_throughput_rps');
|
|
if (metric && typeof metric === 'object' && 'set' in metric) {
|
|
(metric as { set: (value: number) => void }).set(rps);
|
|
}
|
|
}
|
|
|
|
// High-cardinality data (logs only, not metrics)
|
|
logPollingEvent(event: TelemetryLogs): void {
|
|
if (this.shouldLog('INFO')) {
|
|
const redactedEvent = redactPii({
|
|
...event,
|
|
activeDid: hashDid(event.activeDid) // Hash for privacy
|
|
});
|
|
|
|
// eslint-disable-next-line no-console
|
|
console.log('Polling event:', redactedEvent);
|
|
}
|
|
}
|
|
|
|
logError(error: Error, context?: Record<string, unknown>): void {
|
|
if (this.shouldLog('ERROR')) {
|
|
const redactedContext = context ? redactPii(context) : undefined;
|
|
// eslint-disable-next-line no-console
|
|
console.error('Polling error:', {
|
|
message: error.message,
|
|
stack: error.stack,
|
|
context: redactedContext
|
|
});
|
|
}
|
|
}
|
|
|
|
logWarning(message: string, context?: Record<string, unknown>): void {
|
|
if (this.shouldLog('WARN')) {
|
|
const redactedContext = context ? redactPii(context) : undefined;
|
|
console.warn('Polling warning:', { message, context: redactedContext });
|
|
}
|
|
}
|
|
|
|
logDebug(message: string, context?: Record<string, unknown>): void {
|
|
if (this.shouldLog('DEBUG')) {
|
|
const redactedContext = context ? redactPii(context) : undefined;
|
|
// eslint-disable-next-line no-console
|
|
console.debug('Polling debug:', { message, context: redactedContext });
|
|
}
|
|
}
|
|
|
|
private shouldLog(level: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR'): boolean {
|
|
const levels = { DEBUG: 0, INFO: 1, WARN: 2, ERROR: 3 };
|
|
return levels[level] >= levels[this.logLevel];
|
|
}
|
|
|
|
// Get all metrics for export
|
|
getMetrics(): TelemetryMetrics {
|
|
const metrics: Record<string, unknown> = {};
|
|
for (const [name, metric] of this.metrics) {
|
|
if (metric && typeof metric === 'object' && 'value' in metric) {
|
|
metrics[name] = (metric as { value: unknown }).value;
|
|
}
|
|
}
|
|
return metrics as unknown as TelemetryMetrics;
|
|
}
|
|
|
|
// Get metrics in Prometheus format
|
|
getPrometheusMetrics(): string {
|
|
let output = '';
|
|
for (const [name, metric] of this.metrics) {
|
|
if (!metric || typeof metric !== 'object') continue;
|
|
|
|
const metricObj = metric as { help: string; type: string; value?: number; buckets?: number[]; values?: number[] };
|
|
output += `# HELP ${name} ${metricObj.help}\n`;
|
|
output += `# TYPE ${name} ${metricObj.type}\n`;
|
|
|
|
if (metricObj.type === 'histogram' && metricObj.buckets && metricObj.values) {
|
|
// Export histogram buckets
|
|
for (let i = 0; i < metricObj.buckets.length; i++) {
|
|
output += `${name}_bucket{le="${metricObj.buckets[i]}"} ${metricObj.values[i]}\n`;
|
|
}
|
|
output += `${name}_bucket{le="+Inf"} ${metricObj.values[metricObj.buckets.length]}\n`;
|
|
output += `${name}_count ${metricObj.values.reduce((a: number, b: number) => a + b, 0)}\n`;
|
|
} else if (metricObj.value !== undefined) {
|
|
output += `${name} ${metricObj.value}\n`;
|
|
}
|
|
}
|
|
return output;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lint rule to prevent high-cardinality labels in metrics
|
|
*/
|
|
export function validateMetricLabels(labels: Record<string, string>): void {
|
|
const highCardinalityPatterns = [
|
|
/requestId/i,
|
|
/activeDid/i,
|
|
/jwtId/i,
|
|
/userId/i,
|
|
/sessionId/i,
|
|
/traceId/i,
|
|
/spanId/i
|
|
];
|
|
|
|
for (const [key, value] of Object.entries(labels)) {
|
|
for (const pattern of highCardinalityPatterns) {
|
|
if (pattern.test(key)) {
|
|
throw new Error(
|
|
`High-cardinality label detected: ${key}. ` +
|
|
`Use logs for request-level data, not metrics. ` +
|
|
`Consider using a hash or removing the label.`
|
|
);
|
|
}
|
|
}
|
|
|
|
// Check for high-cardinality values
|
|
if (value.length > 50 || /^[a-f0-9]{32,}$/.test(value)) {
|
|
throw new Error(
|
|
`High-cardinality value detected for label ${key}: ${value}. ` +
|
|
`Consider using a hash or removing the label.`
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Safe metric recording with validation
|
|
*/
|
|
export function recordMetricWithValidation(
|
|
telemetry: TelemetryManager,
|
|
metricName: string,
|
|
value: number,
|
|
labels?: Record<string, string>
|
|
): void {
|
|
if (labels) {
|
|
validateMetricLabels(labels);
|
|
}
|
|
|
|
// Record metric based on type
|
|
switch (metricName) {
|
|
case 'starred_projects_poll_attempts_total':
|
|
telemetry.recordPollAttempt();
|
|
break;
|
|
case 'starred_projects_poll_success_total':
|
|
telemetry.recordPollSuccess(value);
|
|
break;
|
|
case 'starred_projects_poll_failure_total':
|
|
telemetry.recordPollFailure();
|
|
break;
|
|
case 'starred_projects_changes_found_total':
|
|
telemetry.recordChangesFound(value);
|
|
break;
|
|
case 'starred_projects_notifications_generated_total':
|
|
telemetry.recordNotificationsGenerated(value);
|
|
break;
|
|
case 'starred_projects_error_total':
|
|
telemetry.recordError();
|
|
break;
|
|
case 'starred_projects_rate_limit_total':
|
|
telemetry.recordRateLimit();
|
|
break;
|
|
case 'starred_projects_api_latency_seconds':
|
|
telemetry.recordApiLatency(value);
|
|
break;
|
|
case 'starred_projects_outbox_size':
|
|
telemetry.recordOutboxSize(value);
|
|
break;
|
|
case 'starred_projects_outbox_backpressure_active':
|
|
telemetry.recordBackpressureActive(value > 0);
|
|
break;
|
|
case 'starred_projects_api_throughput_rps':
|
|
telemetry.recordApiThroughput(value);
|
|
break;
|
|
default:
|
|
throw new Error(`Unknown metric: ${metricName}`);
|
|
}
|
|
}
|
|
|