Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions apps/backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,20 @@ CORS_ORIGIN=http://localhost:3000
# News Provider API (CoinDesk)
# documentation is https://developers.coindesk.com/documentation/data-api/news_v1_search
COINDESK_API_KEY=your_api_key_here

# ========================
# AI Metrics / GPU Monitoring
# ========================

# Maximum concurrent AI inference requests before throttling (default: 10)
AI_MAX_CONCURRENT_INFERENCES=10

# System RAM usage ratio (0-1) that triggers request throttling (default: 0.90)
AI_RAM_THROTTLE_THRESHOLD=0.90

# GPU VRAM usage ratio (0-1) that triggers request throttling (default: 0.90)
AI_VRAM_THROTTLE_THRESHOLD=0.90

# Resource sampling interval in milliseconds (default: 15000)
AI_METRICS_SAMPLING_MS=15000

147 changes: 147 additions & 0 deletions apps/backend/src/ai-metrics/ai-metrics.controller.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import { Test, TestingModule } from '@nestjs/testing';
import { AiMetricsController } from './ai-metrics.controller';
import { AiMetricsService, AiHealthReport } from './ai-metrics.service';

describe('AiMetricsController', () => {
let controller: AiMetricsController;
let aiMetricsService: Partial<AiMetricsService>;

const mockReport: AiHealthReport = {
status: 'healthy',
timestamp: '2026-03-26T09:00:00.000Z',
uptime: 12345,
throttling: {
active: false,
reason: null,
currentLoad: 2,
maxConcurrent: 10,
},
resources: {
totalMemoryBytes: 16e9,
freeMemoryBytes: 8e9,
usedMemoryBytes: 8e9,
memoryUsageRatio: 0.5,
heapUsedBytes: 100e6,
heapTotalBytes: 200e6,
rssBytes: 300e6,
externalBytes: 10e6,
gpuAvailable: false,
vramTotalBytes: null,
vramUsedBytes: null,
vramFreeBytes: null,
vramUsageRatio: null,
},
models: {
totalLoaded: 1,
loadTimes: { 'sentiment-v2': 1200 },
},
counters: {
totalInferenceRequests: 42,
totalInferenceErrors: 3,
throttledRequests: 1,
},
};

beforeEach(async () => {
aiMetricsService = {
getHealthReport: jest.fn().mockReturnValue(mockReport),
getPrometheusMetrics: jest
.fn()
.mockResolvedValue('# HELP ai_inference_requests_total\n'),
};

const module: TestingModule = await Test.createTestingModule({
controllers: [AiMetricsController],
providers: [{ provide: AiMetricsService, useValue: aiMetricsService }],
}).compile();

controller = module.get<AiMetricsController>(AiMetricsController);
});

it('should be defined', () => {
expect(controller).toBeDefined();
});

describe('GET /ai/metrics', () => {
it('should return the health report as JSON', () => {
const json = jest.fn();
const status = jest.fn().mockReturnValue({ json });
const res = { status, json } as any;

controller.getAiMetrics(res);

expect(status).toHaveBeenCalledWith(200);
expect(json).toHaveBeenCalledWith(mockReport);
});

it('should return 500 on error', () => {
(aiMetricsService.getHealthReport as jest.Mock).mockImplementation(() => {
throw new Error('boom');
});

const json = jest.fn();
const status = jest.fn().mockReturnValue({ json });
const res = { status, json } as any;

controller.getAiMetrics(res);

expect(status).toHaveBeenCalledWith(500);
});
});

describe('GET /ai/metrics/prometheus', () => {
it('should return Prometheus text format', async () => {
const send = jest.fn();
const set = jest.fn();
const res = {
set,
send,
status: jest.fn().mockReturnValue({ json: jest.fn() }),
} as any;

await controller.getPrometheusMetrics(res);

expect(set).toHaveBeenCalledWith(
'Content-Type',
'text/plain; version=0.0.4; charset=utf-8',
);
expect(send).toHaveBeenCalledWith(
expect.stringContaining('ai_inference_requests_total'),
);
});
});

describe('GET /ai/metrics/health', () => {
it('should return 200 when healthy', () => {
const json = jest.fn();
const status = jest.fn().mockReturnValue({ json });
const res = { status } as any;

controller.getAiHealth(res);

expect(status).toHaveBeenCalledWith(200);
expect(json).toHaveBeenCalledWith(
expect.objectContaining({ status: 'healthy' }),
);
});

it('should return 503 when critical', () => {
const criticalReport = {
...mockReport,
status: 'critical' as const,
throttling: { ...mockReport.throttling, active: true },
};
(aiMetricsService.getHealthReport as jest.Mock).mockReturnValue(
criticalReport,
);

const json = jest.fn();
const status = jest.fn().mockReturnValue({ json });
const res = { status } as any;

controller.getAiHealth(res);

expect(status).toHaveBeenCalledWith(503);
});
});
});
128 changes: 128 additions & 0 deletions apps/backend/src/ai-metrics/ai-metrics.controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import {
Controller,
Get,
UseGuards,
Res,
Logger,
HttpStatus,
} from '@nestjs/common';
import type { Response } from 'express';
import { AiMetricsService } from './ai-metrics.service';
import { IpAllowlistGuard } from '../metrics/ip-allowlist.guard';
import {
ApiTags,
ApiOperation,
ApiResponse,
ApiProduces,
} from '@nestjs/swagger';

/**
* Controller that exposes the AI-layer health & performance metrics.
*
* Endpoints:
* GET /ai/metrics — full JSON health report (resource usage, throttling, model stats)
* GET /ai/metrics/prometheus — Prometheus-format text for scraping
* GET /ai/metrics/health — lightweight liveness / readiness check
*/
@ApiTags('ai-metrics')
@Controller('ai/metrics')
@UseGuards(IpAllowlistGuard)
export class AiMetricsController {
private readonly logger = new Logger(AiMetricsController.name);

constructor(private readonly aiMetricsService: AiMetricsService) {}

/**
* GET /ai/metrics
* Returns a comprehensive JSON health report including:
* - System status (healthy / degraded / critical)
* - Resource usage (RAM, heap, VRAM)
* - Throttling state & reason
* - Model load times
* - Request & error counters
*/
@Get()
@ApiOperation({
summary: 'Get AI-layer health & performance metrics',
description:
'Returns a comprehensive JSON report of the AI subsystem health, ' +
'including resource utilisation, throttling state, loaded models, and counters.',
})
@ApiResponse({
status: 200,
description: 'AI health report in JSON',
})
@ApiResponse({
status: 403,
description: 'Forbidden — IP not in allowlist and no valid JWT',
})
getAiMetrics(@Res() response: Response): void {
try {
const report = this.aiMetricsService.getHealthReport();
response.status(HttpStatus.OK).json(report);
} catch (error) {
this.logger.error('Error building AI health report:', error);
response
.status(HttpStatus.INTERNAL_SERVER_ERROR)
.json({ error: 'Failed to retrieve AI metrics' });
}
}

/**
* GET /ai/metrics/prometheus
* Returns AI-specific metrics in Prometheus text format.
*/
@Get('prometheus')
@ApiOperation({
summary: 'Get AI metrics in Prometheus format',
description:
'Returns AI inference, model-load, and resource metrics in Prometheus text format for scraping.',
})
@ApiProduces('text/plain')
@ApiResponse({
status: 200,
description: 'Prometheus-format metrics',
})
async getPrometheusMetrics(@Res() response: Response): Promise<void> {
try {
const metrics = await this.aiMetricsService.getPrometheusMetrics();
response.set('Content-Type', 'text/plain; version=0.0.4; charset=utf-8');
response.send(metrics);
} catch (error) {
this.logger.error('Error getting Prometheus AI metrics:', error);
response
.status(HttpStatus.INTERNAL_SERVER_ERROR)
.json({ error: 'Failed to retrieve Prometheus metrics' });
}
}

/**
* GET /ai/metrics/health
* Lightweight liveness/readiness check for the AI subsystem.
* Returns 200 when healthy/degraded, 503 when the system should be throttled.
*/
@Get('health')
@ApiOperation({
summary: 'AI subsystem health check',
description:
'Returns 200 when the AI layer is operational, 503 when it is under resource pressure and throttling.',
})
@ApiResponse({ status: 200, description: 'AI layer is healthy or degraded' })
@ApiResponse({
status: 503,
description: 'AI layer is in a critical state and throttling requests',
})
getAiHealth(@Res() response: Response): void {
const report = this.aiMetricsService.getHealthReport();
const statusCode =
report.status === 'critical'
? HttpStatus.SERVICE_UNAVAILABLE
: HttpStatus.OK;
response.status(statusCode).json({
status: report.status,
timestamp: report.timestamp,
uptime: report.uptime,
throttling: report.throttling,
});
}
}
71 changes: 71 additions & 0 deletions apps/backend/src/ai-metrics/ai-metrics.interceptor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import {
Injectable,
NestInterceptor,
ExecutionContext,
CallHandler,
Logger,
} from '@nestjs/common';
import { Observable } from 'rxjs';
import { tap } from 'rxjs/operators';
import type { Request } from 'express';
import { AiMetricsService } from './ai-metrics.service';

/**
* Interceptor that automatically instruments AI-related routes with
* inference latency tracking.
*
* Apply it to controllers or individual routes:
* @UseInterceptors(AiMetricsInterceptor)
*
* The interceptor reads the `x-ai-model` header (or falls back to the
* route path) to identify the model being used, then records timing
* via the AiMetricsService.
*/
@Injectable()
export class AiMetricsInterceptor implements NestInterceptor {
private readonly logger = new Logger(AiMetricsInterceptor.name);

constructor(private readonly aiMetrics: AiMetricsService) {}

intercept(context: ExecutionContext, next: CallHandler): Observable<unknown> {
const request = context.switchToHttp().getRequest<Request>();
const modelName =
(request.headers['x-ai-model'] as string | undefined) ||
this.extractModelFromRoute(request.path);

const tracker = this.aiMetrics.startInference(modelName);

return next.handle().pipe(
tap({
next: () => {
tracker.end('success');
},
error: (error: unknown) => {
const errorType =
error instanceof Error ? error.constructor.name : 'UnknownError';
tracker.end('error', errorType);
},
}),
);
}

/**
* Derive a model identifier from the route path.
* e.g. /analyze → "sentiment", /retrain → "retraining"
*/
private extractModelFromRoute(path: string): string {
const cleanPath = (path || '').replace(/^\/+|\/+$/g, '').toLowerCase();

if (cleanPath.includes('sentiment') || cleanPath.includes('analyze')) {
return 'sentiment';
}
if (cleanPath.includes('retrain')) {
return 'retraining';
}
if (cleanPath.includes('predict') || cleanPath.includes('forecast')) {
return 'forecasting';
}

return cleanPath || 'unknown';
}
}
Loading
Loading