Pulsefy · sublime247 · Feb 21, 2026 · Feb 21, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/apps/backend/.env.example b/apps/backend/.env.example
@@ -28,3 +28,20 @@ CORS_ORIGIN=http://localhost:3000
 # News Provider API (CoinDesk)
 # documentation is https://developers.coindesk.com/documentation/data-api/news_v1_search
 COINDESK_API_KEY=your_api_key_here
+
+# ========================
+# AI Metrics / GPU Monitoring
+# ========================
+
+# Maximum concurrent AI inference requests before throttling (default: 10)
+AI_MAX_CONCURRENT_INFERENCES=10
+
+# System RAM usage ratio (0-1) that triggers request throttling (default: 0.90)
+AI_RAM_THROTTLE_THRESHOLD=0.90
+
+# GPU VRAM usage ratio (0-1) that triggers request throttling (default: 0.90)
+AI_VRAM_THROTTLE_THRESHOLD=0.90
+
+# Resource sampling interval in milliseconds (default: 15000)
+AI_METRICS_SAMPLING_MS=15000
+
diff --git a/apps/backend/src/ai-metrics/ai-metrics.controller.spec.ts b/apps/backend/src/ai-metrics/ai-metrics.controller.spec.ts
@@ -0,0 +1,147 @@
+import { Test, TestingModule } from '@nestjs/testing';
+import { AiMetricsController } from './ai-metrics.controller';
+import { AiMetricsService, AiHealthReport } from './ai-metrics.service';
+
+describe('AiMetricsController', () => {
+  let controller: AiMetricsController;
+  let aiMetricsService: Partial<AiMetricsService>;
+
+  const mockReport: AiHealthReport = {
+    status: 'healthy',
+    timestamp: '2026-03-26T09:00:00.000Z',
+    uptime: 12345,
+    throttling: {
+      active: false,
+      reason: null,
+      currentLoad: 2,
+      maxConcurrent: 10,
+    },
+    resources: {
+      totalMemoryBytes: 16e9,
+      freeMemoryBytes: 8e9,
+      usedMemoryBytes: 8e9,
+      memoryUsageRatio: 0.5,
+      heapUsedBytes: 100e6,
+      heapTotalBytes: 200e6,
+      rssBytes: 300e6,
+      externalBytes: 10e6,
+      gpuAvailable: false,
+      vramTotalBytes: null,
+      vramUsedBytes: null,
+      vramFreeBytes: null,
+      vramUsageRatio: null,
+    },
+    models: {
+      totalLoaded: 1,
+      loadTimes: { 'sentiment-v2': 1200 },
+    },
+    counters: {
+      totalInferenceRequests: 42,
+      totalInferenceErrors: 3,
+      throttledRequests: 1,
+    },
+  };
+
+  beforeEach(async () => {
+    aiMetricsService = {
+      getHealthReport: jest.fn().mockReturnValue(mockReport),
+      getPrometheusMetrics: jest
+        .fn()
+        .mockResolvedValue('# HELP ai_inference_requests_total\n'),
+    };
+
+    const module: TestingModule = await Test.createTestingModule({
+      controllers: [AiMetricsController],
+      providers: [{ provide: AiMetricsService, useValue: aiMetricsService }],
+    }).compile();
+
+    controller = module.get<AiMetricsController>(AiMetricsController);
+  });
+
+  it('should be defined', () => {
+    expect(controller).toBeDefined();
+  });
+
+  describe('GET /ai/metrics', () => {
+    it('should return the health report as JSON', () => {
+      const json = jest.fn();
+      const status = jest.fn().mockReturnValue({ json });
+      const res = { status, json } as any;
+
+      controller.getAiMetrics(res);
+
+      expect(status).toHaveBeenCalledWith(200);
+      expect(json).toHaveBeenCalledWith(mockReport);
+    });
+
+    it('should return 500 on error', () => {
+      (aiMetricsService.getHealthReport as jest.Mock).mockImplementation(() => {
+        throw new Error('boom');
+      });
+
+      const json = jest.fn();
+      const status = jest.fn().mockReturnValue({ json });
+      const res = { status, json } as any;
+
+      controller.getAiMetrics(res);
+
+      expect(status).toHaveBeenCalledWith(500);
+    });
+  });
+
+  describe('GET /ai/metrics/prometheus', () => {
+    it('should return Prometheus text format', async () => {
+      const send = jest.fn();
+      const set = jest.fn();
+      const res = {
+        set,
+        send,
+        status: jest.fn().mockReturnValue({ json: jest.fn() }),
+      } as any;
+
+      await controller.getPrometheusMetrics(res);
+
+      expect(set).toHaveBeenCalledWith(
+        'Content-Type',
+        'text/plain; version=0.0.4; charset=utf-8',
+      );
+      expect(send).toHaveBeenCalledWith(
+        expect.stringContaining('ai_inference_requests_total'),
+      );
+    });
+  });
+
+  describe('GET /ai/metrics/health', () => {
+    it('should return 200 when healthy', () => {
+      const json = jest.fn();
+      const status = jest.fn().mockReturnValue({ json });
+      const res = { status } as any;
+
+      controller.getAiHealth(res);
+
+      expect(status).toHaveBeenCalledWith(200);
+      expect(json).toHaveBeenCalledWith(
+        expect.objectContaining({ status: 'healthy' }),
+      );
+    });
+
+    it('should return 503 when critical', () => {
+      const criticalReport = {
+        ...mockReport,
+        status: 'critical' as const,
+        throttling: { ...mockReport.throttling, active: true },
+      };
+      (aiMetricsService.getHealthReport as jest.Mock).mockReturnValue(
+        criticalReport,
+      );
+
+      const json = jest.fn();
+      const status = jest.fn().mockReturnValue({ json });
+      const res = { status } as any;
+
+      controller.getAiHealth(res);
+
+      expect(status).toHaveBeenCalledWith(503);
+    });
+  });
+});
diff --git a/apps/backend/src/ai-metrics/ai-metrics.controller.ts b/apps/backend/src/ai-metrics/ai-metrics.controller.ts
@@ -0,0 +1,128 @@
+import {
+  Controller,
+  Get,
+  UseGuards,
+  Res,
+  Logger,
+  HttpStatus,
+} from '@nestjs/common';
+import type { Response } from 'express';
+import { AiMetricsService } from './ai-metrics.service';
+import { IpAllowlistGuard } from '../metrics/ip-allowlist.guard';
+import {
+  ApiTags,
+  ApiOperation,
+  ApiResponse,
+  ApiProduces,
+} from '@nestjs/swagger';
+
+/**
+ * Controller that exposes the AI-layer health & performance metrics.
+ *
+ * Endpoints:
+ *  GET /ai/metrics          — full JSON health report (resource usage, throttling, model stats)
+ *  GET /ai/metrics/prometheus — Prometheus-format text for scraping
+ *  GET /ai/metrics/health   — lightweight liveness / readiness check
+ */
+@ApiTags('ai-metrics')
+@Controller('ai/metrics')
+@UseGuards(IpAllowlistGuard)
+export class AiMetricsController {
+  private readonly logger = new Logger(AiMetricsController.name);
+
+  constructor(private readonly aiMetricsService: AiMetricsService) {}
+
+  /**
+   * GET /ai/metrics
+   * Returns a comprehensive JSON health report including:
+   * - System status (healthy / degraded / critical)
+   * - Resource usage (RAM, heap, VRAM)
+   * - Throttling state & reason
+   * - Model load times
+   * - Request & error counters
+   */
+  @Get()
+  @ApiOperation({
+    summary: 'Get AI-layer health & performance metrics',
+    description:
+      'Returns a comprehensive JSON report of the AI subsystem health, ' +
+      'including resource utilisation, throttling state, loaded models, and counters.',
+  })
+  @ApiResponse({
+    status: 200,
+    description: 'AI health report in JSON',
+  })
+  @ApiResponse({
+    status: 403,
+    description: 'Forbidden — IP not in allowlist and no valid JWT',
+  })
+  getAiMetrics(@Res() response: Response): void {
+    try {
+      const report = this.aiMetricsService.getHealthReport();
+      response.status(HttpStatus.OK).json(report);
+    } catch (error) {
+      this.logger.error('Error building AI health report:', error);
+      response
+        .status(HttpStatus.INTERNAL_SERVER_ERROR)
+        .json({ error: 'Failed to retrieve AI metrics' });
+    }
+  }
+
+  /**
+   * GET /ai/metrics/prometheus
+   * Returns AI-specific metrics in Prometheus text format.
+   */
+  @Get('prometheus')
+  @ApiOperation({
+    summary: 'Get AI metrics in Prometheus format',
+    description:
+      'Returns AI inference, model-load, and resource metrics in Prometheus text format for scraping.',
+  })
+  @ApiProduces('text/plain')
+  @ApiResponse({
+    status: 200,
+    description: 'Prometheus-format metrics',
+  })
+  async getPrometheusMetrics(@Res() response: Response): Promise<void> {
+    try {
+      const metrics = await this.aiMetricsService.getPrometheusMetrics();
+      response.set('Content-Type', 'text/plain; version=0.0.4; charset=utf-8');
+      response.send(metrics);
+    } catch (error) {
+      this.logger.error('Error getting Prometheus AI metrics:', error);
+      response
+        .status(HttpStatus.INTERNAL_SERVER_ERROR)
+        .json({ error: 'Failed to retrieve Prometheus metrics' });
+    }
+  }
+
+  /**
+   * GET /ai/metrics/health
+   * Lightweight liveness/readiness check for the AI subsystem.
+   * Returns 200 when healthy/degraded, 503 when the system should be throttled.
+   */
+  @Get('health')
+  @ApiOperation({
+    summary: 'AI subsystem health check',
+    description:
+      'Returns 200 when the AI layer is operational, 503 when it is under resource pressure and throttling.',
+  })
+  @ApiResponse({ status: 200, description: 'AI layer is healthy or degraded' })
+  @ApiResponse({
+    status: 503,
+    description: 'AI layer is in a critical state and throttling requests',
+  })
+  getAiHealth(@Res() response: Response): void {
+    const report = this.aiMetricsService.getHealthReport();
+    const statusCode =
+      report.status === 'critical'
+        ? HttpStatus.SERVICE_UNAVAILABLE
+        : HttpStatus.OK;
+    response.status(statusCode).json({
+      status: report.status,
+      timestamp: report.timestamp,
+      uptime: report.uptime,
+      throttling: report.throttling,
+    });
+  }
+}
diff --git a/apps/backend/src/ai-metrics/ai-metrics.interceptor.ts b/apps/backend/src/ai-metrics/ai-metrics.interceptor.ts
@@ -0,0 +1,71 @@
+import {
+  Injectable,
+  NestInterceptor,
+  ExecutionContext,
+  CallHandler,
+  Logger,
+} from '@nestjs/common';
+import { Observable } from 'rxjs';
+import { tap } from 'rxjs/operators';
+import type { Request } from 'express';
+import { AiMetricsService } from './ai-metrics.service';
+
+/**
+ * Interceptor that automatically instruments AI-related routes with
+ * inference latency tracking.
+ *
+ * Apply it to controllers or individual routes:
+ *   @UseInterceptors(AiMetricsInterceptor)
+ *
+ * The interceptor reads the `x-ai-model` header (or falls back to the
+ * route path) to identify the model being used, then records timing
+ * via the AiMetricsService.
+ */
+@Injectable()
+export class AiMetricsInterceptor implements NestInterceptor {
+  private readonly logger = new Logger(AiMetricsInterceptor.name);
+
+  constructor(private readonly aiMetrics: AiMetricsService) {}
+
+  intercept(context: ExecutionContext, next: CallHandler): Observable<unknown> {
+    const request = context.switchToHttp().getRequest<Request>();
+    const modelName =
+      (request.headers['x-ai-model'] as string | undefined) ||
+      this.extractModelFromRoute(request.path);
+
+    const tracker = this.aiMetrics.startInference(modelName);
+
+    return next.handle().pipe(
+      tap({
+        next: () => {
+          tracker.end('success');
+        },
+        error: (error: unknown) => {
+          const errorType =
+            error instanceof Error ? error.constructor.name : 'UnknownError';
+          tracker.end('error', errorType);
+        },
+      }),
+    );
+  }
+
+  /**
+   * Derive a model identifier from the route path.
+   * e.g. /analyze → "sentiment", /retrain → "retraining"
+   */
+  private extractModelFromRoute(path: string): string {
+    const cleanPath = (path || '').replace(/^\/+|\/+$/g, '').toLowerCase();
+
+    if (cleanPath.includes('sentiment') || cleanPath.includes('analyze')) {
+      return 'sentiment';
+    }
+    if (cleanPath.includes('retrain')) {
+      return 'retraining';
+    }
+    if (cleanPath.includes('predict') || cleanPath.includes('forecast')) {
+      return 'forecasting';
+    }
+
+    return cleanPath || 'unknown';
+  }
+}