diff --git a/app/.env.example b/app/.env.example index e574ee6..fac70fc 100644 --- a/app/.env.example +++ b/app/.env.example @@ -1,4 +1,6 @@ DATABASE_URL=XXXXXXXXXXX +# SSL mode: 'prefer' (dev - tries SSL, falls back), 'require' (prod - AWS RDS), 'disable' (local dev) +DB_SSL_MODE=prefer NEXT_PUBLIC_MICROSERVICE_URL=XXXXXXXXXXX AWS_REGION=XXXXXXXXXXX AWS_ACCESS_KEY_ID=XXXXXXXXXXX diff --git a/app/scripts/migrate.js b/app/scripts/migrate.js index 9341ba0..3841bc7 100644 --- a/app/scripts/migrate.js +++ b/app/scripts/migrate.js @@ -7,16 +7,26 @@ if (!process.env.DATABASE_URL) { throw new Error("DATABASE_URL is not set"); } -// SSL configuration for AWS RDS -const sslConfig = { - // AWS RDS requires SSL but uses self-signed certificates - // This keeps encryption enabled while accepting AWS certificates - rejectUnauthorized: false, -}; +// SSL configuration - only use for production/AWS RDS +// Development: set DB_SSL_MODE=disable for local PostgreSQL without SSL +// Production: set DB_SSL_MODE=require for AWS RDS +const sslMode = process.env.DB_SSL_MODE || "prefer"; + +let sslConfig = false; +if (sslMode === "require") { + sslConfig = { + rejectUnauthorized: false, + }; +} else if (sslMode === "disable") { + sslConfig = false; +} else if (sslMode === "prefer") { + // Use connectionString with sslmode parameter instead + sslConfig = "prefer"; +} const pool = new Pool({ connectionString: process.env.DATABASE_URL, - ssl: sslConfig, + ssl: sslConfig === "prefer" ? false : sslConfig, }); const db = drizzle(pool); diff --git a/app/src/app/api/metrics/route.ts b/app/src/app/api/metrics/route.ts new file mode 100644 index 0000000..6904804 --- /dev/null +++ b/app/src/app/api/metrics/route.ts @@ -0,0 +1,53 @@ +import { NextResponse } from 'next/server'; +import { metricsCollector } from '@/lib/metrics'; + +/** + * GET /api/metrics + * Returns collected frontend metrics in Prometheus format + */ +export async function GET() { + try { + const prometheusFormat = metricsCollector.exportAsPrometheus(); + return new NextResponse(prometheusFormat, { + status: 200, + headers: { + 'Content-Type': 'text/plain; version=0.0.4; charset=utf-8', + }, + }); + } catch (error) { + console.error('Error generating frontend metrics:', error); + return NextResponse.json( + { error: 'Failed to generate metrics' }, + { status: 500 } + ); + } +} + +/** + * GET /api/metrics/summary + * Returns a JSON summary of frontend metrics + */ +export async function POST(request: Request) { + try { + const { action } = await request.json(); + + if (action === 'clear') { + metricsCollector.clear(); + return NextResponse.json({ + message: 'Metrics cleared', + success: true, + }); + } + + return NextResponse.json( + { error: 'Unknown action' }, + { status: 400 } + ); + } catch (error) { + console.error('Error processing metrics request:', error); + return NextResponse.json( + { error: 'Failed to process request' }, + { status: 500 } + ); + } +} diff --git a/app/src/app/api/profile/summary/route.ts b/app/src/app/api/profile/summary/route.ts index 536955f..fb02bbd 100644 --- a/app/src/app/api/profile/summary/route.ts +++ b/app/src/app/api/profile/summary/route.ts @@ -3,59 +3,117 @@ import { db } from "@/db"; import { transcriptions, subscriptionTable, userTable } from "@/db/schema"; import { validateRequest } from "@/auth"; import { sql, eq } from "drizzle-orm"; +import { createMetricsRecorder } from "@/lib/metrics"; export async function GET() { - const { user } = await validateRequest(); + const startTime = Date.now(); + const requestId = Math.random().toString(36).substring(7); + const metricsRecorder = createMetricsRecorder("/api/profile/summary", "GET").start(); + let statusCode = 200; + let userId: string | undefined; + + // Log request + console.log(`\n${'='.repeat(80)}`); + console.log(`[REQUEST ${requestId}] GET /api/profile/summary`); + console.log(`${'='.repeat(80)}`); + console.log(`Timestamp: ${new Date().toISOString()}`); + + try { + const { user } = await validateRequest(); + userId = user?.id; + console.log(`User ID: ${user?.id || 'Anonymous'}`); - if (!user) { - return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); - } + if (!user) { + statusCode = 401; + console.log(`[RESPONSE ${requestId}] Status: 401 Unauthorized`); + console.log(`Duration: ${Date.now() - startTime}ms`); + console.log(`${'='.repeat(80)}\n`); + metricsRecorder.end(statusCode, userId); + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const userId = user.id; + console.log(`Fetching profile summary for user: ${userId}`); + + // Count total transcriptions for this user + console.log(`Querying total transcriptions count...`); + const totalResult = await db + .select({ count: sql`COUNT(*)` }) + .from(transcriptions) + .where(eq(transcriptions.userID, userId)); + + const totalRecords = totalResult[0]?.count ?? 0; + console.log(`Total records found: ${totalRecords}`); - const userId = user.id; - - // Count total transcriptions for this user - const totalResult = await db - .select({ count: sql`COUNT(*)` }) - .from(transcriptions) - .where(eq(transcriptions.userID, userId)); - - const totalRecords = totalResult[0]?.count ?? 0; - - // Count demo/sample recordings (isDefault = true) - const sampleResult = await db - .select({ count: sql`COUNT(*)` }) - .from(transcriptions) - .where(eq(transcriptions.isDefault, true)); - - const sampleCount = sampleResult[0]?.count ?? 0; - - const [subscriptionInfo] = await db - .select({ - name: subscriptionTable.name, - recordingCount: subscriptionTable.recordingCount, - fileSizeLimitMB: subscriptionTable.fileSizeLimitMB, - durationDays: subscriptionTable.durationDays, - }) - .from(subscriptionTable) - .innerJoin(userTable, eq(userTable.subscriptionId, subscriptionTable.id)) - .where(eq(userTable.id, userId)); - if (!subscriptionInfo) { + // Count demo/sample recordings (isDefault = true) + console.log(`Querying sample/demo recordings count...`); + const sampleResult = await db + .select({ count: sql`COUNT(*)` }) + .from(transcriptions) + .where(eq(transcriptions.isDefault, true)); + + const sampleCount = sampleResult[0]?.count ?? 0; + console.log(`Sample records found: ${sampleCount}`); + + console.log(`Fetching subscription information...`); + const [subscriptionInfo] = await db + .select({ + name: subscriptionTable.name, + recordingCount: subscriptionTable.recordingCount, + fileSizeLimitMB: subscriptionTable.fileSizeLimitMB, + durationDays: subscriptionTable.durationDays, + }) + .from(subscriptionTable) + .innerJoin(userTable, eq(userTable.subscriptionId, subscriptionTable.id)) + .where(eq(userTable.id, userId)); + + if (!subscriptionInfo) { + statusCode = 500; + console.log(`[RESPONSE ${requestId}] Status: 500 - Subscription not found`); + console.log(`Duration: ${Date.now() - startTime}ms`); + console.log(`${'='.repeat(80)}\n`); + metricsRecorder.end(statusCode, userId, "Subscription not found"); + return NextResponse.json( + { error: "Subscription not found for user" }, + { status: 500 } + ); + } + + const responseData = { + email: user.username, + sampleCount, + totalRecords, + subscription: { + name: subscriptionInfo.name, + limit: subscriptionInfo.recordingCount, + remaining: Math.max(subscriptionInfo.recordingCount - totalRecords, 0), + fileSizeLimitMB: subscriptionInfo.fileSizeLimitMB, + durationDays: subscriptionInfo.durationDays, + }, + }; + + statusCode = 200; + console.log(`[RESPONSE ${requestId}] Status: 200 OK`); + console.log(`Response Data:`, JSON.stringify(responseData, null, 2)); + console.log(`Duration: ${Date.now() - startTime}ms`); + console.log(`${'='.repeat(80)}\n`); + + metricsRecorder.end(statusCode, userId); + return NextResponse.json(responseData); + } catch (error) { + statusCode = 500; + const duration = Date.now() - startTime; + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`\n[ERROR ${requestId}] Exception occurred after ${duration}ms`); + console.error(`Error Type: ${error instanceof Error ? error.constructor.name : 'Unknown'}`); + console.error(`Error Message: ${errorMessage}`); + console.error(`Stack:`, error instanceof Error ? error.stack : 'N/A'); + console.log(`${'='.repeat(80)}\n`); + + metricsRecorder.end(statusCode, userId, errorMessage); return NextResponse.json( - { error: "Subscription not found for user" }, + { error: "Internal server error" }, { status: 500 } ); } - - return NextResponse.json({ - email: user.username, - sampleCount, - totalRecords, - subscription: { - name: subscriptionInfo.name, - limit: subscriptionInfo.recordingCount, - remaining: Math.max(subscriptionInfo.recordingCount - totalRecords, 0), - fileSizeLimitMB: subscriptionInfo.fileSizeLimitMB, - durationDays: subscriptionInfo.durationDays, - }, - }); } diff --git a/app/src/app/metrics/page.tsx b/app/src/app/metrics/page.tsx new file mode 100644 index 0000000..c8c65f0 --- /dev/null +++ b/app/src/app/metrics/page.tsx @@ -0,0 +1,509 @@ +'use client'; + +import { useState, useEffect } from 'react'; +import { AlertCircle, RefreshCw, Copy, Check, Lock, Eye, EyeOff } from 'lucide-react'; + +interface MetricsData { + totalRequests: number; + uniqueEndpoints: Set; + statusCodes: Map; + requestDurations: number[]; + endpointStats: Map; + errorRate: number; +} + +export default function MetricsPage() { + const [metrics, setMetrics] = useState(''); + const [frontendMetrics, setFrontendMetrics] = useState(''); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const [copied, setCopied] = useState(false); + const [baseUrl, setBaseUrl] = useState('http://localhost:8000'); + const [apiKey, setApiKey] = useState(''); + const [showApiKey, setShowApiKey] = useState(false); + const [autoRefresh, setAutoRefresh] = useState(false); + const [refreshInterval, setRefreshInterval] = useState(5); + const [metricsHistory, setMetricsHistory] = useState<{ timestamp: number; data: MetricsData }[]>([]); + const [activeTab, setActiveTab] = useState<'backend' | 'frontend'>('backend'); + + useEffect(() => { + // Get the microservice URL from env or default to localhost + const envUrl = process.env.NEXT_PUBLIC_MICROSERVICE_URL; + if (envUrl && !envUrl.includes('undefined')) { + // If it's the Docker URL, convert to localhost for browser + setBaseUrl(envUrl.replace('service:8000', 'localhost:8000')); + } else { + // Default to localhost for local development + setBaseUrl('http://localhost:8000'); + } + + // Load saved API key from localStorage + const savedApiKey = localStorage.getItem('prometheus_api_key'); + if (savedApiKey) { + setApiKey(savedApiKey); + } + }, []); + + // Auto-refresh effect + useEffect(() => { + if (!autoRefresh) return; + + const interval = setInterval(() => { + fetchMetrics(); + fetchFrontendMetrics(); + }, refreshInterval * 1000); + + return () => clearInterval(interval); + }, [autoRefresh, refreshInterval, baseUrl, apiKey]); + + const fetchMetrics = async () => { + setLoading(true); + setError(null); + + try { + const metricsUrl = `${baseUrl}/metrics/`; + const headers: Record = {}; + + if (apiKey) { + headers['X-API-Key'] = apiKey; + // Save API key to localStorage + localStorage.setItem('prometheus_api_key', apiKey); + } + + const response = await fetch(metricsUrl, { headers }); + + if (!response.ok) { + throw new Error( + `Failed to fetch metrics: ${response.status} ${response.statusText}` + ); + } + + const data = await response.text(); + setMetrics(data); + + // Add to history for tracking + const parsed = parseMetricsData(data); + setMetricsHistory(prev => [...prev.slice(-59), { timestamp: Date.now(), data: parsed }]); + } catch (err) { + setError(err instanceof Error ? err.message : 'Unknown error occurred'); + setMetrics(''); + } finally { + setLoading(false); + } + }; + + const fetchFrontendMetrics = async () => { + try { + const response = await fetch('/api/metrics'); + if (!response.ok) { + throw new Error(`Failed to fetch frontend metrics: ${response.status}`); + } + const data = await response.text(); + setFrontendMetrics(data); + } catch (err) { + console.error('Error fetching frontend metrics:', err); + } + }; + + const copyToClipboard = () => { + navigator.clipboard.writeText(metrics); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + }; + + const parseMetrics = () => { + const lines = metrics.split('\n').filter(line => !line.startsWith('#') && line.trim()); + return lines; + }; + + const parseMetricsData = (metricsText: string): MetricsData => { + const lines = metricsText.split('\n').filter(line => !line.startsWith('#') && line.trim()); + const data: MetricsData = { + totalRequests: 0, + uniqueEndpoints: new Set(), + statusCodes: new Map(), + requestDurations: [], + endpointStats: new Map(), + errorRate: 0, + }; + + lines.forEach(line => { + // Parse request totals + if (line.includes('lingo_requests_total{')) { + const statusMatch = line.match(/status="(\d+)"/); + const endpointMatch = line.match(/endpoint="([^"]+)"/); + const countMatch = line.match(/\}\s*([\d.]+)$/); + + if (statusMatch && countMatch) { + const status = statusMatch[1]; + const count = parseInt(countMatch[1]); + data.statusCodes.set(status, (data.statusCodes.get(status) || 0) + count); + data.totalRequests += count; + + // Track error rate + if (!status.startsWith('2')) { + data.errorRate += count; + } + } + + if (endpointMatch) { + data.uniqueEndpoints.add(endpointMatch[1]); + } + } + + // Parse request durations + if (line.includes('lingo_request_duration_seconds_sum{')) { + const durationMatch = line.match(/\}\s*([\d.]+)$/); + if (durationMatch) { + data.requestDurations.push(parseFloat(durationMatch[1])); + } + } + + // Parse endpoint statistics + if (line.includes('lingo_requests_total{')) { + const endpointMatch = line.match(/endpoint="([^"]+)"/); + const countMatch = line.match(/\}\s*([\d.]+)$/); + if (endpointMatch && countMatch) { + const endpoint = endpointMatch[1]; + const count = parseInt(countMatch[1]); + const current = data.endpointStats.get(endpoint) || { count: 0, duration: 0 }; + data.endpointStats.set(endpoint, { ...current, count: current.count + count }); + } + } + }); + + // Calculate error rate percentage + if (data.totalRequests > 0) { + data.errorRate = (data.errorRate / data.totalRequests) * 100; + } + + return data; + }; + + const getMetricsSummary = (): MetricsData => { + return parseMetricsData(metrics); + }; + + const summary = metrics ? getMetricsSummary() : null; + + const avgResponseTime = summary && summary.requestDurations.length > 0 + ? (summary.requestDurations.reduce((a, b) => a + b, 0) / summary.requestDurations.length * 1000).toFixed(2) + : 0; + + const requestsPerSecond = metricsHistory.length > 1 + ? ((metricsHistory[metricsHistory.length - 1].data.totalRequests - + metricsHistory[0].data.totalRequests) / ((metricsHistory[metricsHistory.length - 1].timestamp - + metricsHistory[0].timestamp) / 1000)).toFixed(2) + : 0; + + const getStatusColor = (status: string) => { + if (status.startsWith('2')) return 'text-green-400'; + if (status.startsWith('3')) return 'text-blue-400'; + if (status.startsWith('4')) return 'text-yellow-400'; + return 'text-red-400'; + }; + + const getErrorRate = (rate: number) => { + if (rate < 1) return 'text-green-400'; + if (rate < 5) return 'text-yellow-400'; + return 'text-red-400'; + }; + + return ( +
+
+ {/* Header */} +
+

Prometheus Metrics

+

Real-time monitoring dashboard for Lingo AI

+
+ + {/* Config Section */} +
+ {/* Microservice URL */} +
+ +
+ setBaseUrl(e.target.value)} + placeholder="http://localhost:8000" + className="flex-1 px-4 py-2 bg-slate-600 border border-slate-500 rounded text-white placeholder-gray-400 focus:outline-none focus:border-blue-500 text-sm" + onKeyPress={(e) => e.key === 'Enter' && fetchMetrics()} + /> + +
+
+ + {/* API Key Input */} +
+ +
+ setApiKey(e.target.value)} + placeholder="Prometheus API key" + className="flex-1 px-4 py-2 bg-slate-600 border border-slate-500 rounded text-white placeholder-gray-400 focus:outline-none focus:border-blue-500 text-sm" + /> + +
+
+
+ + {/* Auto-refresh Controls */} +
+
+ + {autoRefresh && ( +
+ + +
+ )} +
+
+ {metricsHistory.length > 0 && `History: ${metricsHistory.length} samples`} +
+
+ + {/* Error Display */} + {error && ( +
+ +
+

Error

+

{error}

+
+
+ )} + + {/* Key Metrics Summary */} + {summary && ( +
+ {/* Total Requests */} +
+
Total Requests
+
{summary.totalRequests}
+
+ {requestsPerSecond} req/sec +
+
+ + {/* Unique Endpoints */} +
+
Unique Endpoints
+
{summary.uniqueEndpoints.size}
+
+ {Array.from(summary.uniqueEndpoints).slice(0, 2).map(ep => ( +
{ep}
+ ))} + {summary.uniqueEndpoints.size > 2 && ( +
+{summary.uniqueEndpoints.size - 2} more
+ )} +
+
+ + {/* Avg Response Time */} +
+
Avg Response Time
+
{avgResponseTime}ms
+
milliseconds
+
+ + {/* Success Rate */} +
+
Success Rate
+
+ {(100 - summary.errorRate).toFixed(1)}% +
+
2xx status codes
+
+ + {/* Error Rate */} +
5 ? 'border-red-500' : 'border-slate-600'}`}> +
Error Rate
+
+ {summary.errorRate.toFixed(1)}% +
+
4xx, 5xx codes
+
+
+ )} + + {/* Status Codes Breakdown */} + {summary && ( +
+

HTTP Status Codes

+
+ {Array.from(summary.statusCodes.entries()).map(([status, count]) => ( +
+
{status}
+
{count} requests
+
+ ))} +
+
+ )} + + {/* Endpoints Performance */} + {summary && summary.endpointStats.size > 0 && ( +
+

Endpoint Performance

+
+ {Array.from(summary.endpointStats.entries()) + .sort((a, b) => b[1].count - a[1].count) + .map(([endpoint, stats]) => ( +
+
+
{endpoint}
+
Requests: {stats.count}
+
+
+
+ {stats.count > 0 ? ((stats.duration / stats.count) * 1000).toFixed(0) : 0}ms +
+
+
+ ))} +
+
+ )} + + {/* Tabs */} + {metrics || frontendMetrics ? ( +
+ + +
+ ) : null} + + {/* Metrics Display */} + {activeTab === 'backend' && metrics && ( +
+
+

Raw Backend Metrics Output

+ +
+
+
+                {metrics}
+              
+
+
+ )} + + {/* Frontend Metrics Display */} + {activeTab === 'frontend' && frontendMetrics && ( +
+
+

Raw Frontend Metrics Output

+ +
+
+
+                {frontendMetrics}
+              
+
+
+ )} + + {/* Empty State */} + {!metrics && !error && ( +
+
+ +

Enter your API key and click "Fetch Metrics" to view Prometheus metrics

+
+
+ )} + + {/* Info Footer */} +
+

💡 Note: Metrics endpoint is available at /metrics/ (with trailing slash)

+
+
+
+ ); +} diff --git a/app/src/db/index.ts b/app/src/db/index.ts index 66207b4..d33b0c1 100644 --- a/app/src/db/index.ts +++ b/app/src/db/index.ts @@ -1,22 +1,79 @@ -import { drizzle } from 'drizzle-orm/node-postgres' -import { Pool } from 'pg' -import * as schema from './schema' - +import { drizzle } from 'drizzle-orm/node-postgres'; +import { Pool, QueryConfig, QueryResult } from 'pg'; +import * as schema from './schema'; +import { recordDbQueryMetric } from '@/lib/metrics'; if (!process.env.DATABASE_URL) { - throw new Error("DATABASE_URL is not set"); + throw new Error('DATABASE_URL is not set'); +} + +// SSL configuration - only use for production/AWS RDS +// Development: set DB_SSL_MODE=disable for local PostgreSQL without SSL +// Production: set DB_SSL_MODE=require for AWS RDS +const sslMode = process.env.DB_SSL_MODE || 'prefer'; + +let sslConfig: any = false; +if (sslMode === 'require') { + sslConfig = { + rejectUnauthorized: false, + }; +} else if (sslMode === 'disable') { + sslConfig = false; +} else if (sslMode === 'prefer') { + // For prefer mode, don't set ssl to allow fallback + sslConfig = false; } const pool = new Pool({ connectionString: process.env.DATABASE_URL, - max: 10, // max 10 connections - idleTimeoutMillis: 30000, // idle connections are closed after 30s - connectionTimeoutMillis: 2000, // wait 2s for a connection before failing - ssl: { - // AWS RDS requires SSL but uses self-signed certificates - // This keeps encryption enabled while accepting AWS certificates - rejectUnauthorized: false, - }, -}) + max: 10, + idleTimeoutMillis: 30000, + connectionTimeoutMillis: 2000, + ssl: sslConfig, +}); + +function extractOperation(text: string | undefined | null): string { + if (!text) return 'UNKNOWN'; + const first = text.trim().split(/\s+/)[0]?.toUpperCase(); + if (!first) return 'UNKNOWN'; + return first; +} + +const originalQuery = pool.query.bind(pool); + +// Wrap pg Pool#query to record DB timings for all queries (including those via drizzle) +// eslint-disable-next-line @typescript-eslint/no-explicit-any +(pool as any).query = async ( + queryTextOrConfig: string | QueryConfig, + values?: any[] +): Promise => { + const start = process.hrtime.bigint(); + let success = true; + let operation = 'UNKNOWN'; + + try { + const sqlText = + typeof queryTextOrConfig === 'string' + ? queryTextOrConfig + : queryTextOrConfig?.text; + operation = extractOperation(sqlText); + + const result = await originalQuery(queryTextOrConfig as any, values); + return result; + } catch (err) { + success = false; + throw err; + } finally { + const end = process.hrtime.bigint(); + const durationMs = Number(end - start) / 1_000_000; + + recordDbQueryMetric({ + operation, + success, + duration: durationMs, + timestamp: Date.now(), + }); + } +}; -export const db = drizzle(pool, { schema }) \ No newline at end of file +export const db = drizzle(pool, { schema }); \ No newline at end of file diff --git a/app/src/lib/metrics.ts b/app/src/lib/metrics.ts new file mode 100644 index 0000000..dc96231 --- /dev/null +++ b/app/src/lib/metrics.ts @@ -0,0 +1,274 @@ +/** + * Frontend Metrics Tracking + * Collects metrics for Next.js API routes and sends them to an in-memory store + */ + +export interface ApiMetric { + endpoint: string; + method: string; + statusCode: number; + duration: number; // in milliseconds + timestamp: number; + requestId: string; + userId?: string; + error?: string; +} + +export interface DbQueryMetric { + operation: string; + success: boolean; + duration: number; // in milliseconds + timestamp: number; +} + +class MetricsCollector { + private metrics: ApiMetric[] = []; + private maxMetrics = 1000; // Keep last 1000 metrics + private dbMetrics: DbQueryMetric[] = []; + private maxDbMetrics = 2000; + + /** + * Record an API metric + */ + recordMetric(metric: ApiMetric) { + this.metrics.push(metric); + + // Keep only the last N metrics to avoid memory bloat + if (this.metrics.length > this.maxMetrics) { + this.metrics = this.metrics.slice(-this.maxMetrics); + } + } + + /** + * Record a DB query metric + */ + recordDbMetric(metric: DbQueryMetric) { + this.dbMetrics.push(metric); + + if (this.dbMetrics.length > this.maxDbMetrics) { + this.dbMetrics = this.dbMetrics.slice(-this.maxDbMetrics); + } + } + + /** + * Get all metrics + */ + getAllMetrics(): ApiMetric[] { + return this.metrics; + } + + /** + * Get metrics summary + */ + getSummary() { + if (this.metrics.length === 0) { + return { + totalRequests: 0, + uniqueEndpoints: 0, + averageResponseTime: 0, + errorRate: 0, + statusCodes: {}, + endpointStats: {}, + }; + } + + const statusCodes: Record = {}; + const endpointStats: Record< + string, + { count: number; totalDuration: number; errors: number } + > = {}; + let totalDuration = 0; + let errorCount = 0; + + this.metrics.forEach((metric) => { + // Track status codes + statusCodes[metric.statusCode] = + (statusCodes[metric.statusCode] || 0) + 1; + + // Track endpoint stats + if (!endpointStats[metric.endpoint]) { + endpointStats[metric.endpoint] = { + count: 0, + totalDuration: 0, + errors: 0, + }; + } + endpointStats[metric.endpoint].count++; + endpointStats[metric.endpoint].totalDuration += metric.duration; + + // Count errors + if (metric.statusCode >= 400) { + errorCount++; + endpointStats[metric.endpoint].errors++; + } + + totalDuration += metric.duration; + }); + + const averageResponseTime = Math.round( + totalDuration / this.metrics.length + ); + const errorRate = Math.round((errorCount / this.metrics.length) * 100); + + return { + totalRequests: this.metrics.length, + uniqueEndpoints: Object.keys(endpointStats).length, + averageResponseTime, + errorRate, + statusCodes, + endpointStats: Object.entries(endpointStats).map( + ([endpoint, stats]) => ({ + endpoint, + count: stats.count, + averageDuration: Math.round(stats.totalDuration / stats.count), + errorCount: stats.errors, + }) + ), + }; + } + + /** + * Clear all metrics + */ + clear() { + this.metrics = []; + } + + /** + * Export metrics as Prometheus-compatible format + */ + exportAsPrometheus(): string { + let output = ''; + + // Request count by endpoint and status + output += '# HELP lingo_frontend_requests_total Total API requests\n'; + output += '# TYPE lingo_frontend_requests_total counter\n'; + + const endpointStatusMap: Record> = {}; + this.metrics.forEach((metric) => { + if (!endpointStatusMap[metric.endpoint]) { + endpointStatusMap[metric.endpoint] = {}; + } + endpointStatusMap[metric.endpoint][metric.statusCode] = + (endpointStatusMap[metric.endpoint][metric.statusCode] || 0) + 1; + }); + + Object.entries(endpointStatusMap).forEach(([endpoint, statuses]) => { + Object.entries(statuses).forEach(([status, count]) => { + output += `lingo_frontend_requests_total{endpoint="${endpoint}",method="*",status="${status}"} ${count}\n`; + }); + }); + + // Response time histogram + output += '# HELP lingo_frontend_request_duration_seconds Request duration\n'; + output += '# TYPE lingo_frontend_request_duration_seconds histogram\n'; + + const durations = this.metrics.map((m) => m.duration / 1000); // Convert to seconds + const buckets = [0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5]; + + this.metrics.forEach((metric) => { + const durationSeconds = metric.duration / 1000; + output += `lingo_frontend_request_duration_seconds_sum{endpoint="${metric.endpoint}",method="*"} ${durations.reduce((a, b) => a + b, 0)}\n`; + + buckets.forEach((bucket) => { + const count = durations.filter((d) => d <= bucket).length; + output += `lingo_frontend_request_duration_seconds_bucket{endpoint="${metric.endpoint}",le="${bucket}",method="*"} ${count}\n`; + }); + }); + + output += `lingo_frontend_request_duration_seconds_bucket{endpoint="*",le="+Inf",method="*"} ${this.metrics.length}\n`; + output += `lingo_frontend_request_duration_seconds_count{endpoint="*",method="*"} ${this.metrics.length}\n`; + + // DB query metrics + output += '# HELP lingo_frontend_db_queries_total Total DB queries from frontend\n'; + output += '# TYPE lingo_frontend_db_queries_total counter\n'; + + const dbCountByOpAndSuccess: Record> = {}; + this.dbMetrics.forEach((m) => { + const op = m.operation || 'UNKNOWN'; + const successKey = m.success ? 'true' : 'false'; + if (!dbCountByOpAndSuccess[op]) { + dbCountByOpAndSuccess[op] = {}; + } + dbCountByOpAndSuccess[op][successKey] = + (dbCountByOpAndSuccess[op][successKey] || 0) + 1; + }); + + Object.entries(dbCountByOpAndSuccess).forEach(([operation, bySuccess]) => { + Object.entries(bySuccess).forEach(([success, count]) => { + output += `lingo_frontend_db_queries_total{operation="${operation}",success="${success}"} ${count}\n`; + }); + }); + + // Simple DB query duration histogram (no labels other than operation) + output += '# HELP lingo_frontend_db_query_duration_seconds DB query duration\n'; + output += '# TYPE lingo_frontend_db_query_duration_seconds histogram\n'; + + const dbDurationsSeconds = this.dbMetrics.map((m) => m.duration / 1000); + const dbBuckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1]; + + const bucketCounts: Record = {}; + dbBuckets.forEach((b) => (bucketCounts[b] = 0)); + bucketCounts['Inf'] = 0; + + dbDurationsSeconds.forEach((d) => { + let placed = false; + for (const b of dbBuckets) { + if (d <= b) { + bucketCounts[b] = (bucketCounts[b] || 0) + 1; + placed = true; + break; + } + } + if (!placed) { + bucketCounts['Inf'] = (bucketCounts['Inf'] || 0) + 1; + } + }); + + dbBuckets.forEach((b) => { + output += `lingo_frontend_db_query_duration_seconds_bucket{le="${b}"} ${bucketCounts[b] || 0}\n`; + }); + output += `lingo_frontend_db_query_duration_seconds_bucket{le="+Inf"} ${bucketCounts['Inf'] || 0}\n`; + const dbSum = dbDurationsSeconds.reduce((a, b) => a + b, 0); + output += `lingo_frontend_db_query_duration_seconds_sum ${dbSum}\n`; + output += `lingo_frontend_db_query_duration_seconds_count ${this.dbMetrics.length}\n`; + + return output; + } +} + +// Global singleton instance +export const metricsCollector = new MetricsCollector(); + +/** + * Helper function to record API metrics from route handlers + */ +export function createMetricsRecorder(endpoint: string, method: string) { + return { + start() { + const startTime = Date.now(); + const requestId = Math.random().toString(36).substring(7); + + return { + end(statusCode: number, userId?: string, error?: string) { + const duration = Date.now() - startTime; + + metricsCollector.recordMetric({ + endpoint, + method, + statusCode, + duration, + timestamp: Date.now(), + requestId, + userId, + error, + }); + }, + }; + }, + }; +} + +export function recordDbQueryMetric(metric: DbQueryMetric) { + metricsCollector.recordDbMetric(metric); +} diff --git a/docker-compose.yml b/docker-compose.yml index 132c83f..037a6f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,8 @@ services: depends_on: - fastapi restart: unless-stopped + networks: + - lingo-network fastapi: container_name: service @@ -24,6 +26,8 @@ services: ports: - "8000:8000" restart: unless-stopped + networks: + - lingo-network redis: container_name: lingo-redis @@ -31,3 +35,45 @@ services: restart: always ports: - "6379:6379" + networks: + - lingo-network + + prometheus: + container_name: prometheus + image: prom/prometheus:latest + ports: + - "9090:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-storage:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=30d' + restart: unless-stopped + networks: + - lingo-network + + grafana: + container_name: grafana + image: grafana/grafana:latest + ports: + - "3001:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_INSTALL_PLUGINS=grafana-piechart-panel + volumes: + - grafana-storage:/var/lib/grafana + depends_on: + - prometheus + restart: unless-stopped + networks: + - lingo-network + +networks: + lingo-network: + driver: bridge + +volumes: + prometheus-storage: + grafana-storage: diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml new file mode 100644 index 0000000..4d7d56d --- /dev/null +++ b/prometheus/prometheus.yml @@ -0,0 +1,27 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + monitor: 'lingo-ai-monitor' + +scrape_configs: + # FastAPI Backend Service + - job_name: 'lingo-backend' + static_configs: + - targets: ['service:8000'] + metrics_path: '/metrics/' + scrape_interval: 5s + scrape_timeout: 5s + + # Next.js Frontend Service + - job_name: 'lingo-frontend' + static_configs: + - targets: ['app:3000'] + metrics_path: '/api/metrics' + scrape_interval: 5s + scrape_timeout: 5s + + # Prometheus itself + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] diff --git a/service/.env.example b/service/.env.example index 410a221..8f56106 100644 --- a/service/.env.example +++ b/service/.env.example @@ -19,7 +19,13 @@ DB_PASSWORD=password DB_HOST=localhost DB_PORT=5432 DB_NAME=lingo +# SSL mode: 'prefer' (dev - tries SSL, falls back), 'require' (prod - AWS RDS), 'disable' (local dev) +DB_SSL_MODE=prefer # Zaban STT/TTS API (optional; used for upload STT and TTS) ZABAN_BASE_URL=http://localhost:8000 ZABAN_API_KEY= + +# Prometheus Metrics Configuration +PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus +PROMETHEUS_API_KEY=your-secure-prometheus-api-key-here diff --git a/service/Dockerfile b/service/Dockerfile index 67ed8f2..412dd31 100644 --- a/service/Dockerfile +++ b/service/Dockerfile @@ -35,7 +35,9 @@ COPY . . # Create non-root user for security RUN useradd -m -u 1000 appuser && \ - chown -R appuser:appuser /lingo/service + chown -R appuser:appuser /lingo/service && \ + mkdir -p /tmp/prometheus && \ + chmod 777 /tmp/prometheus USER appuser EXPOSE 8000 diff --git a/service/banking/database.py b/service/banking/database.py index 6a3495e..d769979 100644 --- a/service/banking/database.py +++ b/service/banking/database.py @@ -1,9 +1,23 @@ -from sqlalchemy import create_engine, Column, Integer, String, Float, ForeignKey, DateTime, Text, Boolean, Index +from sqlalchemy import ( + create_engine, + Column, + Integer, + String, + Float, + ForeignKey, + DateTime, + Text, + Boolean, + Index, + event, +) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, relationship import datetime import os import sys +import time +from prometheus_client import Counter, Histogram # Add the parent directory to sys.path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -16,24 +30,85 @@ DB_PORT = config.db_port DB_NAME = config.db_name +SQLALCHEMY_DATABASE_URL = ( + f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" +) -SQLALCHEMY_DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" +# Prometheus metrics for database queries +db_query_count = Counter( + "lingo_db_queries_total", + "Total database queries", + ["operation", "success"], +) + +db_query_duration = Histogram( + "lingo_db_query_duration_seconds", + "Database query duration in seconds", + ["operation", "success"], +) -# Create PostgreSQL engine with SSL for AWS RDS -# AWS RDS requires SSL - this configuration accepts AWS certificates + +def _get_sql_operation(statement: str) -> str: + """ + Best-effort extraction of the SQL verb (SELECT/INSERT/UPDATE/DELETE/...). + Keeps label cardinality low by grouping by operation only. + """ + if not statement: + return "UNKNOWN" + first_token = statement.strip().split()[0].upper() + if first_token in {"SELECT", "INSERT", "UPDATE", "DELETE"}: + return first_token + return first_token or "UNKNOWN" + + +# Create PostgreSQL engine +# For production (AWS RDS): use sslmode=require +# For development: use sslmode=prefer (tries SSL but falls back if unavailable) engine = create_engine( SQLALCHEMY_DATABASE_URL, connect_args={ - "sslmode": "require", - } + "sslmode": os.getenv("DB_SSL_MODE", "prefer"), + }, ) + +@event.listens_for(engine, "before_cursor_execute") +def before_cursor_execute( + conn, cursor, statement, parameters, context, executemany +): # pragma: no cover - instrumentation + context._query_start_time = time.time() + context._query_operation = _get_sql_operation(statement) + + +@event.listens_for(engine, "after_cursor_execute") +def after_cursor_execute( + conn, cursor, statement, parameters, context, executemany +): # pragma: no cover - instrumentation + start_time = getattr(context, "_query_start_time", None) + operation = getattr(context, "_query_operation", _get_sql_operation(statement)) + + if start_time is None: + return + + duration = time.time() - start_time + db_query_count.labels(operation=operation, success="true").inc() + db_query_duration.labels(operation=operation, success="true").observe(duration) + + +@event.listens_for(engine, "handle_error") +def handle_error(exception_context): # pragma: no cover - instrumentation + statement = getattr(exception_context, "statement", "") or "" + operation = _get_sql_operation(statement) + db_query_count.labels(operation=operation, success="false").inc() + + # Create a SessionLocal class for database session SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) # Create a Base class for declarative models Base = declarative_base() + # Dependency to get DB session def get_db(): db = SessionLocal() diff --git a/service/logger.py b/service/logger.py index 44bb44b..2b9daee 100644 --- a/service/logger.py +++ b/service/logger.py @@ -1,4 +1,28 @@ import logging +import sys +from datetime import datetime -logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s') -logger = logging.getLogger(__name__) \ No newline at end of file +# Create a custom logger with detailed formatting +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +# Console handler with detailed format +console_handler = logging.StreamHandler(sys.stdout) +console_handler.setLevel(logging.DEBUG) + +# Detailed format: timestamp, level, logger name, and message +formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +console_handler.setFormatter(formatter) + +# Add handler to logger +if not logger.handlers: + logger.addHandler(console_handler) + +# Also configure root logger for other modules +root_logger = logging.getLogger() +root_logger.setLevel(logging.INFO) +if not root_logger.handlers: + root_logger.addHandler(console_handler) \ No newline at end of file diff --git a/service/main.py b/service/main.py index 2a9617d..02ec27b 100644 --- a/service/main.py +++ b/service/main.py @@ -1,7 +1,8 @@ -from fastapi import FastAPI, UploadFile, File, Form, Depends -from fastapi.responses import JSONResponse +from fastapi import FastAPI, UploadFile, File, Form, Depends, Header, HTTPException, Request +from fastapi.responses import JSONResponse, Response from logger import logger from dotenv import load_dotenv +import os from starlette.middleware.cors import CORSMiddleware from audio_service import translate_with_whisper_timestamped, translate_with_whisper_from_upload from detect_intent import detect_intent_with_llama, format_intent_response, translate @@ -19,9 +20,23 @@ from redis_client import session_manager from datetime import datetime from session_service import SessionService, SessionFlowProcessor +from prometheus_client import Counter, Histogram, make_asgi_app, CollectorRegistry, multiprocess, generate_latest +from prometheus_client import REGISTRY + +load_dotenv() +PROMETHEUS_API_KEY = os.getenv("PROMETHEUS_API_KEY", "") + +# Ensure Prometheus temp directory exists if using multiprocess mode +prometheus_multiproc_dir = os.getenv("PROMETHEUS_MULTIPROC_DIR") +if prometheus_multiproc_dir and not os.path.exists(prometheus_multiproc_dir): + os.makedirs(prometheus_multiproc_dir, exist_ok=True) app = FastAPI() +# Initialize basic Prometheus metrics +request_count = Counter('lingo_requests_total', 'Total requests', ['endpoint', 'method', 'status']) +request_duration = Histogram('lingo_request_duration_seconds', 'Request duration in seconds', ['endpoint', 'method']) + # Add CORS middleware to the application app.add_middleware( CORSMiddleware, @@ -31,10 +46,118 @@ allow_headers=["*"], # Allows all headers ) +# Middleware to track metrics and log detailed request/response info +@app.middleware("http") +async def detailed_logging_middleware(request, call_next): + import time + import json + + start_time = time.time() + request_id = str(os.urandom(8).hex()) + + # Skip detailed logging for metrics endpoint (to avoid noise), but still track metrics + skip_logging = request.url.path == "/metrics" or request.url.path == "/metrics/" + + # Log request details (skip for /metrics to avoid noise) + if not skip_logging: + logger.info(f"\n{'='*80}") + logger.info(f"[REQUEST {request_id}] {request.method} {request.url.path}") + logger.info(f"{'='*80}") + + # Log basic request info (skip for /metrics) + if not skip_logging: + logger.info(f"URL: {request.url}") + logger.info(f"Client: {request.client.host if request.client else 'Unknown'}:{request.client.port if request.client else 'Unknown'}") + logger.info(f"Method: {request.method}") + logger.info(f"Path: {request.url.path}") + logger.info(f"Query Params: {dict(request.query_params)}") + + # Log headers (excluding sensitive ones) + headers_to_log = {} + sensitive_headers = {'authorization', 'x-api-key', 'cookie', 'password'} + for key, value in request.headers.items(): + if key.lower() not in sensitive_headers: + headers_to_log[key] = value + logger.info(f"Headers: {headers_to_log}") + + # Log body for POST/PUT/PATCH requests + if request.method in ["POST", "PUT", "PATCH"]: + try: + body = await request.body() + if body: + try: + body_json = json.loads(body) + logger.info(f"Body: {json.dumps(body_json, indent=2)}") + except: + logger.info(f"Body (raw): {body[:500]}") + except Exception as e: + logger.warning(f"Failed to read request body: {e}") + + try: + response = await call_next(request) + duration = time.time() - start_time + + # Log response details (skip for /metrics) + if not skip_logging: + logger.info(f"\n[RESPONSE {request_id}] Status: {response.status_code}") + logger.info(f"Duration: {duration:.3f}s") + + # Safely convert headers to dict + try: + response_headers = dict(response.headers) + except: + response_headers = dict(response.headers.raw) if hasattr(response.headers, 'raw') else {} + logger.info(f"Response Headers: {response_headers}") + + # Record metrics for all endpoints (including /metrics) + try: + endpoint = request.url.path + method = request.method + status = response.status_code + + request_count.labels(endpoint=endpoint, method=method, status=status).inc() + request_duration.labels(endpoint=endpoint, method=method).observe(duration) + except Exception as e: + logger.warning(f"Failed to record metrics: {e}") + + if not skip_logging: + logger.info(f"{'='*80}\n") + return response + + except Exception as e: + duration = time.time() - start_time + if not skip_logging: + logger.error(f"\n[ERROR {request_id}] Exception occurred after {duration:.3f}s") + logger.error(f"Error Type: {type(e).__name__}") + logger.error(f"Error Message: {str(e)}") + logger.error(f"Traceback: {traceback.format_exc()}") + + try: + request_count.labels(endpoint=request.url.path, method=request.method, status=500).inc() + request_duration.labels(endpoint=request.url.path, method=request.method).observe(duration) + except Exception as metric_err: + if not skip_logging: + logger.warning(f"Failed to record error metrics: {metric_err}") + + if not skip_logging: + logger.error(f"{'='*80}\n") + raise + @app.get("/") def root_route(): return 'Hello, this is the root route for lingo ai server' +@app.get("/health") +def health_check(): + """Health check endpoint for monitoring""" + return {"status": "healthy", "timestamp": datetime.now().isoformat()} + +@app.post("/test-log") +def test_logging(data: dict = None): + """Test endpoint to verify detailed logging is working""" + logger.info(f"Test endpoint called with data: {data}") + return {"message": "Test successful", "received": data} + class Body(BaseModel): audio_file_link: str @@ -231,3 +354,26 @@ async def transcribe_intent( logger.error(f"Error in transcribe-intent: {traceback.format_exc()}") current_session_id = session_id if session_id else "unknown" return JSONResponse(content={"message": str(e), "session_id": current_session_id}, status_code=500) + + +# Simple metrics endpoint (no authentication for now) +@app.get("/metrics") +@app.get("/metrics/") +def get_metrics_handler(): + """Prometheus metrics endpoint""" + try: + prometheus_multiproc_dir = os.getenv("PROMETHEUS_MULTIPROC_DIR") + + if prometheus_multiproc_dir: + # Use multiprocess mode for Gunicorn with multiple workers + registry = CollectorRegistry() + multiprocess.MultiProcessCollector(registry) + else: + # Use default registry for single-worker development + registry = REGISTRY + + metrics_data = generate_latest(registry) + return Response(metrics_data, media_type="text/plain; version=0.0.4") + except Exception as e: + logger.error(f"Error generating metrics: {traceback.format_exc()}") + raise HTTPException(status_code=500, detail="Failed to generate metrics") \ No newline at end of file diff --git a/service/requirements.txt b/service/requirements.txt index 5edcda6..1b821bc 100644 --- a/service/requirements.txt +++ b/service/requirements.txt @@ -30,4 +30,5 @@ sentencepiece ninja sqlalchemy psycopg2-binary -redis \ No newline at end of file +redis +prometheus-client