diff --git a/CLAUDE.md b/CLAUDE.md index 8bcdcef90..f5d6c64f4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -177,6 +177,18 @@ pnpm --filter @roast/web run test:ci # MUST actually run, not assume # TypeScript compiles β‰  tests pass ``` +### Development Workflow: Modify β†’ Check Loop + +When making code changes, especially to internal packages (`@roast/ai`, `@roast/db`, `@roast/domain`, `@roast/jobs`), follow this verification loop before committing: + +1. **After modifying internal packages**: Run `pnpm turbo run typecheck` (not just `pnpm --filter @roast/web typecheck`). Turbo handles the dependency graphβ€”it rebuilds packages first, then typechecks consumers with fresh `.d.ts` files. This mimics CI's clean-build behavior. + +2. **After modifying web app only**: Run `pnpm --filter @roast/web run typecheck && pnpm --filter @roast/web run lint`. + +3. **Before pushing**: Always run the full check: `pnpm turbo run typecheck lint --parallel`. This catches cross-package type errors that per-package checks miss due to stale `dist/` folders. + +4. **Why this matters**: TypeScript project references use compiled `dist/` for type resolution. Local dev accumulates stale builds; CI starts fresh. If you see typecheck errors and assume they're "pre-existing," verify by rebuilding the source package first (`pnpm --filter @roast/ai run build`). + ## Commands Quick Reference ### Development @@ -186,6 +198,30 @@ pnpm --filter @roast/db run gen # Generate Prisma client pnpm --filter @roast/db run db:push # Push schema changes ``` +### Dev Environment & Database Access (Primary) + +**🚨 MANDATORY: Use ONLY these scripts for database access - NO EXCEPTIONS 🚨** + +```bash +dev/scripts/dev-env.sh start|stop|status|attach|restart # Manage tmux dev session +dev/scripts/dev-env.sh psql [args] # Connect to local DB via Docker +``` + +**FORBIDDEN database access methods** (DO NOT USE): +```bash +❌ psql -h localhost ... # Direct psql - FORBIDDEN +❌ PGPASSWORD=... psql ... # Direct psql with password - FORBIDDEN +❌ docker run ... postgres psql # Docker-based psql - FORBIDDEN +❌ Any other method # If it's not dev-env.sh psql, DON'T USE IT +``` + +**Database utilities** (Docker-based, no local psql needed): +- `dev/scripts/dev/db/lib/db_functions.sh` - Core DB functions (`psql_local`, `psql_prod`, `pg_dump_prod`, `copy_data`) +- `dev/scripts/dev/db/lib/common_utils.sh` - Shared bash utilities +- `dev/scripts/dev/db/setup_db.sh` - Example: sync prod schema to local + +**AI agents MUST use `dev/scripts/dev-env.sh psql` - no alternatives allowed.** + ### Testing ```bash # Test categories by cost/dependencies: @@ -203,6 +239,29 @@ pnpm --filter @roast/web run typecheck # TypeScript # MUST run both - linter doesn't catch type errors! ``` +### 🚨 Type Definitions: NO INLINE TYPES 🚨 + +**NEVER use inline types.** Always define named interfaces or type aliases. + +```typescript +// ❌ WRONG - inline types +function Foo({ data }: { data: string; count: number }) { } +const [state, setState] = useState<{ loading: boolean; error?: string }>(); +let result: { success: boolean; value: number }; + +// βœ… CORRECT - named types +interface FooProps { data: string; count: number; } +function Foo({ data }: FooProps) { } + +interface LoadingState { loading: boolean; error?: string; } +const [state, setState] = useState(); + +interface Result { success: boolean; value: number; } +let result: Result; +``` + +**Why:** Inline types hurt readability, reusability, and refactoring. Named types are self-documenting and can be exported/shared. + ## MCP Server Quick Fix **Problem**: Claude Code caches MCP servers, changes don't take effect @@ -278,6 +337,22 @@ Details here" /bin/rm, /bin/cat, /bin/echo # Use full paths ``` +## Tmux Key Sending + +When sending multiple keystrokes to tmux sessions (e.g., navigating CLI menus), use a loop with delays between keystrokes instead of sending them all at once. + +**Bad** (keys may be dropped or processed incorrectly): +```bash +tmux send-keys -t session Down Down Down Down Down Enter +``` + +**Good** (reliable keystroke delivery): +```bash +for i in {1..5}; do tmux send-keys -t session Down; sleep 0.1; done; tmux send-keys -t session Enter +``` + +This ensures each keystroke is processed before the next is sent, preventing navigation issues in terminal UIs. + ## Documentation Structure - `/dev/docs/README.md` - Documentation index - `/dev/docs/development/` - Development guides diff --git a/apps/web/src/app/api/monitor/lab/baselines/[id]/route.ts b/apps/web/src/app/api/monitor/lab/baselines/[id]/route.ts new file mode 100644 index 000000000..89ef3c167 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/baselines/[id]/route.ts @@ -0,0 +1,28 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { metaEvaluationRepository } from "@roast/db"; +import type { RouteIdParams } from "../../types"; + +export async function DELETE( + request: NextRequest, + { params }: RouteIdParams +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id } = await params; + + try { + await metaEvaluationRepository.deleteValidationBaseline(id); + return NextResponse.json({ success: true }); + } catch (error) { + logger.error("Error deleting baseline:", error); + return commonErrors.serverError("Failed to delete baseline"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/baselines/route.ts b/apps/web/src/app/api/monitor/lab/baselines/route.ts new file mode 100644 index 000000000..527ee1bc9 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/baselines/route.ts @@ -0,0 +1,96 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { metaEvaluationRepository, prisma } from "@roast/db"; + +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const agentId = request.nextUrl.searchParams.get("agentId"); + if (!agentId) { + return NextResponse.json({ error: "agentId is required" }, { status: 400 }); + } + + try { + const baselines = await metaEvaluationRepository.getValidationBaselines(agentId); + return NextResponse.json({ baselines }); + } catch (error) { + logger.error("Error fetching baselines:", error); + return commonErrors.serverError("Failed to fetch baselines"); + } +} + +export async function POST(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + try { + const body = await request.json(); + const { name, description, agentId, documentIds, evaluationVersionIds, beforeDate } = body; + + if (!name || !agentId) { + return NextResponse.json( + { error: "name and agentId are required" }, + { status: 400 } + ); + } + + // Get evaluation version IDs from document IDs if not provided directly + let evalVersionIds = evaluationVersionIds; + if (!evalVersionIds?.length && documentIds?.length) { + // Get the latest evaluation version for each document (optionally before cutoff date) + const evaluations = await prisma.evaluationVersion.findMany({ + where: { + agentId, + evaluation: { + documentId: { in: documentIds }, + }, + ...(beforeDate ? { createdAt: { lt: new Date(beforeDate) } } : {}), + }, + orderBy: { createdAt: "desc" }, + select: { + id: true, + evaluation: { select: { documentId: true } }, + }, + }); + + // Keep only the latest version per document (before cutoff if specified) + const latestByDoc = new Map(); + for (const ev of evaluations) { + if (!latestByDoc.has(ev.evaluation.documentId)) { + latestByDoc.set(ev.evaluation.documentId, ev.id); + } + } + evalVersionIds = Array.from(latestByDoc.values()); + } + + if (!evalVersionIds?.length) { + return NextResponse.json( + { error: "No evaluation versions found for the selected documents" }, + { status: 400 } + ); + } + + const baseline = await metaEvaluationRepository.createValidationBaseline({ + name, + description, + agentId, + evaluationVersionIds: evalVersionIds, + createdById: userId, + }); + + return NextResponse.json({ baseline }); + } catch (error) { + logger.error("Error creating baseline:", error); + return commonErrors.serverError("Failed to create baseline"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/corpus/route.ts b/apps/web/src/app/api/monitor/lab/corpus/route.ts new file mode 100644 index 000000000..1c2336b2a --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/corpus/route.ts @@ -0,0 +1,33 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { metaEvaluationRepository } from "@roast/db"; + +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const agentId = request.nextUrl.searchParams.get("agentId"); + const filter = request.nextUrl.searchParams.get("filter") || undefined; + const limit = parseInt(request.nextUrl.searchParams.get("limit") || "500", 10); + + if (!agentId) { + return NextResponse.json({ error: "agentId is required" }, { status: 400 }); + } + + try { + const documents = await metaEvaluationRepository.getValidationCorpusDocuments(agentId, { + filter, + limit, + }); + return NextResponse.json({ documents }); + } catch (error) { + logger.error("Error fetching corpus documents:", error); + return commonErrors.serverError("Failed to fetch corpus documents"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/corpus/snapshots/route.ts b/apps/web/src/app/api/monitor/lab/corpus/snapshots/route.ts new file mode 100644 index 000000000..50a5fcae3 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/corpus/snapshots/route.ts @@ -0,0 +1,38 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { metaEvaluationRepository } from "@roast/db"; + +// Get evaluation snapshots for a set of documents (used when creating baselines) +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const agentId = request.nextUrl.searchParams.get("agentId"); + const documentIdsParam = request.nextUrl.searchParams.get("documentIds"); + + if (!agentId || !documentIdsParam) { + return NextResponse.json( + { error: "agentId and documentIds are required" }, + { status: 400 } + ); + } + + const documentIds = documentIdsParam.split(",").filter(Boolean); + if (documentIds.length === 0) { + return NextResponse.json({ error: "documentIds cannot be empty" }, { status: 400 }); + } + + try { + const snapshots = await metaEvaluationRepository.getEvaluationSnapshots(documentIds, agentId); + return NextResponse.json({ snapshots }); + } catch (error) { + logger.error("Error fetching evaluation snapshots:", error); + return commonErrors.serverError("Failed to fetch evaluation snapshots"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/corpus/versions/route.ts b/apps/web/src/app/api/monitor/lab/corpus/versions/route.ts new file mode 100644 index 000000000..b50f17daa --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/corpus/versions/route.ts @@ -0,0 +1,56 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { prisma } from "@roast/db"; + +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const agentId = request.nextUrl.searchParams.get("agentId"); + const documentId = request.nextUrl.searchParams.get("documentId"); + const beforeDate = request.nextUrl.searchParams.get("beforeDate"); + + if (!agentId || !documentId) { + return NextResponse.json( + { error: "agentId and documentId are required" }, + { status: 400 } + ); + } + + try { + const versions = await prisma.evaluationVersion.findMany({ + where: { + agentId, + evaluation: { documentId }, + ...(beforeDate ? { createdAt: { lt: new Date(beforeDate) } } : {}), + }, + orderBy: { createdAt: "desc" }, + select: { + id: true, + createdAt: true, + grade: true, + summary: true, + version: true, + }, + }); + + return NextResponse.json({ + versions: versions.map((v) => ({ + id: v.id, + createdAt: v.createdAt.toISOString(), + grade: v.grade, + summary: v.summary, + version: v.version, + })), + }); + } catch (error) { + logger.error("Error fetching evaluation versions:", error); + return commonErrors.serverError("Failed to fetch evaluation versions"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/evaluations/route.ts b/apps/web/src/app/api/monitor/lab/evaluations/route.ts new file mode 100644 index 000000000..98b53cf62 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/evaluations/route.ts @@ -0,0 +1,135 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { prisma, Prisma } from "@roast/db"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; + +/** + * GET /api/monitor/lab/evaluations + * + * Fetches recent evaluations with their pipeline telemetry for the Lab UI. + * Admin-only endpoint. + * + * Query params: + * - limit: number of evaluations to fetch (default 20, max 100) + * - agentId: optional filter by agent ID + */ +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) { + return commonErrors.unauthorized(); + } + + const adminCheck = await isAdmin(); + if (!adminCheck) { + return commonErrors.forbidden(); + } + + const { searchParams } = new URL(request.url); + const limit = Math.min(parseInt(searchParams.get("limit") || "20"), 100); + const agentId = searchParams.get("agentId"); + + try { + // Build where clause + const where: Prisma.EvaluationVersionWhereInput = { + pipelineTelemetry: { not: Prisma.JsonNull }, + ...(agentId && { evaluation: { agentId } }), + }; + + // Get recent evaluation versions with telemetry + const versions = await prisma.evaluationVersion.findMany({ + where, + take: limit, + orderBy: { createdAt: "desc" }, + select: { + id: true, + version: true, + grade: true, + summary: true, + createdAt: true, + pipelineTelemetry: true, + evaluation: { + select: { + id: true, + document: { + select: { + id: true, + versions: { + select: { title: true }, + orderBy: { version: "desc" as const }, + take: 1, + }, + }, + }, + agent: { + select: { + id: true, + versions: { + select: { name: true }, + orderBy: { version: "desc" as const }, + take: 1, + }, + }, + }, + }, + }, + comments: { + select: { + id: true, + header: true, + description: true, + importance: true, + highlight: { + select: { + quotedText: true, + }, + }, + }, + }, + }, + }); + + // Transform to Lab UI format + const evaluations = versions.map((v) => { + const telemetry = v.pipelineTelemetry as Record | null; + + return { + id: v.id, + evaluationId: v.evaluation.id, + version: v.version, + grade: v.grade, + summary: v.summary, + createdAt: v.createdAt.toISOString(), + documentId: v.evaluation.document.id, + documentTitle: v.evaluation.document.versions[0]?.title || "Untitled", + agentId: v.evaluation.agent.id, + agentName: v.evaluation.agent.versions[0]?.name || "Unknown Agent", + comments: v.comments.map((c) => ({ + id: c.id, + header: c.header, + description: c.description, + importance: c.importance, + quotedText: c.highlight.quotedText || "", + })), + // Telemetry data (matches ComparisonData structure) + telemetry: telemetry ? { + stages: telemetry.stages, + extractionPhase: telemetry.extractionPhase, + filteredItems: telemetry.filteredItems, + passedItems: telemetry.passedItems, + pipelineCounts: telemetry.finalCounts, + totalDurationMs: telemetry.totalDurationMs, + totalCostUsd: telemetry.totalCostUsd, + documentLength: telemetry.documentLength, + profileInfo: telemetry.profileInfo, + } : null, + }; + }); + + return NextResponse.json({ evaluations }); + } catch (error) { + logger.error("Error fetching evaluations with telemetry:", error); + return commonErrors.serverError("Failed to fetch evaluations"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/jobs/status/route.ts b/apps/web/src/app/api/monitor/lab/jobs/status/route.ts new file mode 100644 index 000000000..142187e32 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/jobs/status/route.ts @@ -0,0 +1,59 @@ +import { NextRequest, NextResponse } from "next/server"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { prisma } from "@roast/db"; + +/** + * Get status of multiple jobs by ID + */ +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const jobIdsParam = request.nextUrl.searchParams.get("jobIds"); + if (!jobIdsParam) { + return NextResponse.json({ error: "jobIds is required" }, { status: 400 }); + } + + const jobIds = jobIdsParam.split(",").filter(Boolean); + if (jobIds.length === 0) { + return NextResponse.json({ error: "jobIds cannot be empty" }, { status: 400 }); + } + + try { + const jobs = await prisma.job.findMany({ + where: { id: { in: jobIds } }, + select: { + id: true, + status: true, + evaluationVersionId: true, + error: true, + }, + }); + + const completed = jobs.filter((j) => j.status === "COMPLETED").length; + const failed = jobs.filter((j) => j.status === "FAILED").length; + const pending = jobs.filter((j) => j.status === "PENDING").length; + const running = jobs.filter((j) => j.status === "RUNNING").length; + + const allDone = completed + failed === jobs.length; + + return NextResponse.json({ + jobs, + summary: { + total: jobs.length, + completed, + failed, + pending, + running, + allDone, + }, + }); + } catch { + return commonErrors.serverError("Failed to get job status"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/model-endpoints/route.ts b/apps/web/src/app/api/monitor/lab/model-endpoints/route.ts new file mode 100644 index 000000000..2c49d934c --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/model-endpoints/route.ts @@ -0,0 +1,33 @@ +import { NextRequest, NextResponse } from "next/server"; + +export async function GET(request: NextRequest) { + const modelId = request.nextUrl.searchParams.get("model"); + + if (!modelId) { + return NextResponse.json({ error: "model parameter required" }, { status: 400 }); + } + + try { + // Don't encode the full modelId - OpenRouter expects the / as part of the path + // e.g., /models/z-ai/glm-4.7/endpoints not /models/z-ai%2Fglm-4.7/endpoints + const response = await fetch( + `https://openrouter.ai/api/v1/models/${modelId}/endpoints` + ); + + if (!response.ok) { + return NextResponse.json( + { error: `OpenRouter API error: ${response.status}` }, + { status: response.status } + ); + } + + const data = await response.json(); + return NextResponse.json(data); + } catch (error) { + console.error("Failed to fetch model endpoints:", error); + return NextResponse.json( + { error: "Failed to fetch model endpoints" }, + { status: 500 } + ); + } +} diff --git a/apps/web/src/app/api/monitor/lab/models/route.ts b/apps/web/src/app/api/monitor/lab/models/route.ts new file mode 100644 index 000000000..58716f8ec --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/models/route.ts @@ -0,0 +1,26 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { getAllModels } from "@roast/ai"; + +/** + * GET /api/monitor/lab/models + * Fetch all available models from Anthropic + OpenRouter + */ +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + try { + const models = await getAllModels(); + return NextResponse.json({ models }); + } catch (error) { + logger.error("Error fetching models:", error); + return commonErrors.serverError("Failed to fetch models"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/profiles/[id]/route.ts b/apps/web/src/app/api/monitor/lab/profiles/[id]/route.ts new file mode 100644 index 000000000..bc9db5eaf --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/profiles/[id]/route.ts @@ -0,0 +1,151 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { prisma } from "@roast/db"; +import type { RouteIdParams } from "../../types"; + +/** + * GET /api/monitor/lab/profiles/[id] + * Get a single profile by ID + */ +export async function GET( + request: NextRequest, + { params }: RouteIdParams +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id } = await params; + + try { + const profile = await prisma.fallacyCheckerProfile.findUnique({ + where: { id }, + }); + + if (!profile) { + return NextResponse.json({ error: "Profile not found" }, { status: 404 }); + } + + return NextResponse.json({ profile }); + } catch (error) { + logger.error("Error fetching profile:", error); + return commonErrors.serverError("Failed to fetch profile"); + } +} + +/** + * PUT /api/monitor/lab/profiles/[id] + * Update a profile + */ +export async function PUT( + request: NextRequest, + { params }: RouteIdParams +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id } = await params; + + try { + const body = await request.json(); + const { name, description, config, isDefault } = body; + + // Check profile exists + const existing = await prisma.fallacyCheckerProfile.findUnique({ + where: { id }, + }); + + if (!existing) { + return NextResponse.json({ error: "Profile not found" }, { status: 404 }); + } + + // Check for duplicate name (excluding current profile) + if (name && name !== existing.name) { + const duplicate = await prisma.fallacyCheckerProfile.findFirst({ + where: { + agentId: existing.agentId, + name, + id: { not: id }, + }, + }); + + if (duplicate) { + return NextResponse.json( + { error: "A profile with this name already exists" }, + { status: 400 } + ); + } + } + + // If setting as default, unset other defaults first + if (isDefault && !existing.isDefault) { + await prisma.fallacyCheckerProfile.updateMany({ + where: { agentId: existing.agentId, isDefault: true, id: { not: id } }, + data: { isDefault: false }, + }); + } + + const profile = await prisma.fallacyCheckerProfile.update({ + where: { id }, + data: { + ...(name !== undefined && { name }), + ...(description !== undefined && { description }), + ...(config !== undefined && { config }), + ...(isDefault !== undefined && { isDefault }), + }, + }); + + logger.info("Profile updated", { profileId: id }); + + return NextResponse.json({ profile }); + } catch (error) { + logger.error("Error updating profile:", error); + return commonErrors.serverError("Failed to update profile"); + } +} + +/** + * DELETE /api/monitor/lab/profiles/[id] + * Delete a profile + */ +export async function DELETE( + request: NextRequest, + { params }: RouteIdParams +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id } = await params; + + try { + const existing = await prisma.fallacyCheckerProfile.findUnique({ + where: { id }, + }); + + if (!existing) { + return NextResponse.json({ error: "Profile not found" }, { status: 404 }); + } + + await prisma.fallacyCheckerProfile.delete({ + where: { id }, + }); + + logger.info("Profile deleted", { profileId: id }); + + return NextResponse.json({ success: true }); + } catch (error) { + logger.error("Error deleting profile:", error); + return commonErrors.serverError("Failed to delete profile"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/profiles/route.ts b/apps/web/src/app/api/monitor/lab/profiles/route.ts new file mode 100644 index 000000000..da5e2308e --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/profiles/route.ts @@ -0,0 +1,135 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { prisma } from "@roast/db"; + +/** + * GET /api/monitor/lab/profiles + * List all profiles for an agent + */ +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const agentId = request.nextUrl.searchParams.get("agentId"); + if (!agentId) { + return NextResponse.json({ error: "agentId is required" }, { status: 400 }); + } + + try { + const profiles = await prisma.fallacyCheckerProfile.findMany({ + where: { agentId }, + orderBy: [ + { isDefault: "desc" }, + { name: "asc" }, + ], + }); + + return NextResponse.json({ profiles }); + } catch (error) { + logger.error("Error fetching profiles:", error); + return commonErrors.serverError("Failed to fetch profiles"); + } +} + +/** + * POST /api/monitor/lab/profiles + * Create a new profile + */ +export async function POST(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + try { + const body = await request.json(); + const { name, description, agentId, config, isDefault } = body; + + if (!name || !agentId) { + return NextResponse.json( + { error: "name and agentId are required" }, + { status: 400 } + ); + } + + // Check for duplicate name + const existing = await prisma.fallacyCheckerProfile.findFirst({ + where: { agentId, name }, + }); + + if (existing) { + return NextResponse.json( + { error: "A profile with this name already exists" }, + { status: 400 } + ); + } + + // If setting as default, unset other defaults first + if (isDefault) { + await prisma.fallacyCheckerProfile.updateMany({ + where: { agentId, isDefault: true }, + data: { isDefault: false }, + }); + } + + const profile = await prisma.fallacyCheckerProfile.create({ + data: { + name, + description: description ?? null, + agentId, + config: config ?? getDefaultConfig(), + isDefault: isDefault ?? false, + }, + }); + + logger.info("Profile created", { profileId: profile.id, name, agentId }); + + return NextResponse.json({ profile }); + } catch (error) { + logger.error("Error creating profile:", error); + return commonErrors.serverError("Failed to create profile"); + } +} + +/** + * Default profile configuration - matches the real fallacy checker defaults + * Uses the NEW filterChain array format (not the old { filters: [...] } format) + */ +function getDefaultConfig() { + return { + version: 1, + models: { + extractors: [ + { model: "claude-sonnet-4-5-20250929", temperature: 0, thinking: false }, + { model: "google/gemini-3-flash-preview", temperature: "default", thinking: true }, + { model: "google/gemini-2.5-flash", temperature: "default", thinking: true }, + ], + judge: { + model: "claude-sonnet-4-5-20250929", + enabled: false, + }, + }, + thresholds: { + minSeverityThreshold: 60, + maxIssues: 15, + dedupThreshold: 0.7, + maxIssuesToProcess: 25, + }, + filterChain: [ + { + id: "default-supported-elsewhere", + type: "supported-elsewhere", + enabled: true, + model: "claude-sonnet-4-5-20250929", + temperature: 0.1, + }, + ], + }; +} diff --git a/apps/web/src/app/api/monitor/lab/prompts/route.ts b/apps/web/src/app/api/monitor/lab/prompts/route.ts new file mode 100644 index 000000000..d0a56dc55 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/prompts/route.ts @@ -0,0 +1,22 @@ +import { NextResponse } from "next/server"; +import { + DEFAULT_EXTRACTOR_SYSTEM_PROMPT, + DEFAULT_EXTRACTOR_USER_PROMPT, +} from "@roast/ai/fallacy-extractor/prompts"; +import { DEFAULT_JUDGE_SYSTEM_PROMPT } from "@roast/ai/fallacy-judge/prompts"; +import { DEFAULT_SUPPORTED_ELSEWHERE_SYSTEM_PROMPT } from "@roast/ai/supported-elsewhere-filter/prompts"; + +/** + * GET /api/monitor/lab/prompts + * + * Returns the default prompts for the fallacy extractor, judge, and filter. + * Used by the profile editor UI to show placeholders. + */ +export function GET() { + return NextResponse.json({ + extractorSystemPrompt: DEFAULT_EXTRACTOR_SYSTEM_PROMPT, + extractorUserPrompt: DEFAULT_EXTRACTOR_USER_PROMPT, + judgeSystemPrompt: DEFAULT_JUDGE_SYSTEM_PROMPT, + filterSystemPrompt: DEFAULT_SUPPORTED_ELSEWHERE_SYSTEM_PROMPT, + }); +} diff --git a/apps/web/src/app/api/monitor/lab/runs/[id]/finalize/route.ts b/apps/web/src/app/api/monitor/lab/runs/[id]/finalize/route.ts new file mode 100644 index 000000000..c4ba853e0 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/runs/[id]/finalize/route.ts @@ -0,0 +1,278 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { prisma, metaEvaluationRepository } from "@roast/db"; +import type { RouteIdParams } from "../../../types"; + +interface CommentData { + id: string; + quotedText: string; + header: string | null; + description: string; + importance: number | null; + startOffset: number; + endOffset: number; +} + +interface EvaluationSnapshot { + evaluationVersionId: string; + documentId: string; + comments: CommentData[]; + pipelineTelemetry?: { + filteredItems?: unknown[]; + extractionPhase?: unknown; + finalCounts?: { + issuesExtracted?: number; + issuesAfterDedup?: number; + issuesAfterFiltering?: number; + commentsGenerated?: number; + commentsKept?: number; + }; + }; +} + +/** + * Finalize a validation run: + * 1. Get the new evaluation versions from completed jobs + * 2. Compare with baseline + * 3. Save comparison results + * 4. Update run status + */ +export async function POST( + request: NextRequest, + { params }: RouteIdParams +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id: runId } = await params; + + try { + // Get the run + const run = await prisma.validationRun.findUnique({ + where: { id: runId }, + include: { + baseline: { + select: { id: true, agentId: true }, + }, + }, + }); + + if (!run) { + return NextResponse.json({ error: "Run not found" }, { status: 404 }); + } + + if (run.status === "completed") { + return NextResponse.json({ error: "Run already finalized" }, { status: 400 }); + } + + // Get baseline snapshots + const baselineSnapshots = await metaEvaluationRepository.getBaselineSnapshots(run.baselineId); + + if (baselineSnapshots.length === 0) { + await metaEvaluationRepository.updateValidationRunStatus(runId, "failed", "Baseline has no snapshots"); + return NextResponse.json({ error: "Baseline has no snapshots" }, { status: 400 }); + } + + // Get the document IDs + const documentIds = [...new Set(baselineSnapshots.map((s) => s.documentId))]; + + // Get the latest evaluation versions for these documents + const newSnapshots = await metaEvaluationRepository.getEvaluationSnapshots( + documentIds, + run.baseline.agentId + ); + + // Compare and save results + let unchangedCount = 0; + let changedCount = 0; + + for (const baselineSnapshot of baselineSnapshots) { + const newSnapshot = newSnapshots.find( + (s) => s.documentId === baselineSnapshot.documentId + ); + + if (newSnapshot) { + // Compare comments + const comparison = compareSnapshots( + toEvaluationSnapshot(baselineSnapshot), + toEvaluationSnapshot(newSnapshot) + ); + + // Get baseline snapshot record ID + const baselineSnapshotRecord = await metaEvaluationRepository.getBaselineSnapshotByDocument( + run.baselineId, + baselineSnapshot.documentId + ); + + if (baselineSnapshotRecord) { + const status = + comparison.newComments.length === 0 && comparison.lostComments.length === 0 + ? "unchanged" + : "changed"; + + if (status === "unchanged") unchangedCount++; + else changedCount++; + + // Get pipeline telemetry from new snapshot + const telemetry = newSnapshot.pipelineTelemetry as EvaluationSnapshot["pipelineTelemetry"]; + const finalCounts = telemetry?.finalCounts; + + // Get full telemetry record for stages + const fullTelemetry = newSnapshot.pipelineTelemetry as { + stages?: Array<{ + stageName: string; + durationMs: number; + inputCount: number; + outputCount: number; + model?: string; + costUsd?: number; + }>; + totalDurationMs?: number; + } & EvaluationSnapshot["pipelineTelemetry"]; + + await metaEvaluationRepository.addValidationRunSnapshot({ + runId, + baselineSnapshotId: baselineSnapshotRecord.id, + newEvaluationId: newSnapshot.evaluationVersionId, + status, + keptCount: comparison.matchedComments.length, + newCount: comparison.newComments.length, + lostCount: comparison.lostComments.length, + comparisonData: { + matchedComments: comparison.matchedComments, + newComments: comparison.newComments, + lostComments: comparison.lostComments, + filteredItems: telemetry?.filteredItems, + extractionPhase: telemetry?.extractionPhase, + stages: fullTelemetry.stages, + totalDurationMs: fullTelemetry.totalDurationMs, + pipelineCounts: finalCounts + ? { + issuesAfterDedup: finalCounts.issuesAfterDedup ?? 0, + issuesAfterFiltering: finalCounts.issuesAfterFiltering ?? 0, + commentsGenerated: finalCounts.commentsGenerated ?? 0, + commentsKept: finalCounts.commentsKept ?? 0, + } + : undefined, + }, + }); + } + } + } + + // Update run status + const summary = `${unchangedCount} unchanged, ${changedCount} changed`; + await metaEvaluationRepository.updateValidationRunStatus(runId, "completed", summary); + + logger.info("Validation run finalized", { + runId, + unchangedCount, + changedCount, + }); + + return NextResponse.json({ + success: true, + summary, + unchangedCount, + changedCount, + }); + } catch (error) { + logger.error("Error finalizing validation run:", error); + + // Mark run as failed + try { + await metaEvaluationRepository.updateValidationRunStatus( + runId, + "failed", + error instanceof Error ? error.message : "Unknown error" + ); + } catch { + // Ignore secondary error + } + + return commonErrors.serverError("Failed to finalize validation run"); + } +} + +// Helper to convert snapshot format +function toEvaluationSnapshot(snapshot: { + evaluationVersionId: string; + documentId: string; + comments: CommentData[]; + pipelineTelemetry?: unknown; +}): EvaluationSnapshot { + return { + evaluationVersionId: snapshot.evaluationVersionId, + documentId: snapshot.documentId, + comments: snapshot.comments, + pipelineTelemetry: snapshot.pipelineTelemetry as EvaluationSnapshot["pipelineTelemetry"], + }; +} + +// Simple comment comparison +function compareSnapshots(baseline: EvaluationSnapshot, current: EvaluationSnapshot) { + const matchedComments: Array<{ + baselineComment: CommentData; + currentComment: CommentData; + matchConfidence: number; + status: string; + }> = []; + const newComments: CommentData[] = []; + const lostComments: CommentData[] = []; + + const usedCurrentIndices = new Set(); + + // Find matches based on quoted text similarity + for (const baselineComment of baseline.comments) { + let bestMatch: { index: number; score: number } | null = null; + + for (let i = 0; i < current.comments.length; i++) { + if (usedCurrentIndices.has(i)) continue; + + const currentComment = current.comments[i]; + const score = calculateSimilarity(baselineComment.quotedText, currentComment.quotedText); + + if (score > 0.6 && (!bestMatch || score > bestMatch.score)) { + bestMatch = { index: i, score }; + } + } + + if (bestMatch) { + usedCurrentIndices.add(bestMatch.index); + matchedComments.push({ + baselineComment, + currentComment: current.comments[bestMatch.index], + matchConfidence: bestMatch.score, + status: "matched", + }); + } else { + lostComments.push(baselineComment); + } + } + + // Find new comments (not matched to any baseline) + for (let i = 0; i < current.comments.length; i++) { + if (!usedCurrentIndices.has(i)) { + newComments.push(current.comments[i]); + } + } + + return { matchedComments, newComments, lostComments }; +} + +// Simple text similarity (Jaccard on words) +function calculateSimilarity(a: string, b: string): number { + const wordsA = new Set(a.toLowerCase().split(/\s+/)); + const wordsB = new Set(b.toLowerCase().split(/\s+/)); + + const intersection = new Set([...wordsA].filter((x) => wordsB.has(x))); + const union = new Set([...wordsA, ...wordsB]); + + return intersection.size / union.size; +} diff --git a/apps/web/src/app/api/monitor/lab/runs/[id]/route.ts b/apps/web/src/app/api/monitor/lab/runs/[id]/route.ts new file mode 100644 index 000000000..f539e8474 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/runs/[id]/route.ts @@ -0,0 +1,52 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { metaEvaluationRepository } from "@roast/db"; +import type { RouteIdParams } from "../../types"; + +export async function GET( + request: NextRequest, + { params }: RouteIdParams +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id } = await params; + + try { + const run = await metaEvaluationRepository.getValidationRunDetail(id); + if (!run) { + return NextResponse.json({ error: "Run not found" }, { status: 404 }); + } + return NextResponse.json({ run }); + } catch (error) { + logger.error("Error fetching run detail:", error); + return commonErrors.serverError("Failed to fetch run detail"); + } +} + +export async function DELETE( + request: NextRequest, + { params }: { params: Promise<{ id: string }> } +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id } = await params; + + try { + await metaEvaluationRepository.deleteValidationRun(id); + return NextResponse.json({ success: true }); + } catch (error) { + logger.error("Error deleting run:", error); + return commonErrors.serverError("Failed to delete run"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/runs/route.ts b/apps/web/src/app/api/monitor/lab/runs/route.ts new file mode 100644 index 000000000..e16bcaf24 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/runs/route.ts @@ -0,0 +1,59 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { metaEvaluationRepository } from "@roast/db"; + +export async function GET(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const baselineId = request.nextUrl.searchParams.get("baselineId"); + if (!baselineId) { + return NextResponse.json({ error: "baselineId is required" }, { status: 400 }); + } + + try { + const runs = await metaEvaluationRepository.getValidationRuns(baselineId); + return NextResponse.json({ runs }); + } catch (error) { + logger.error("Error fetching runs:", error); + return commonErrors.serverError("Failed to fetch runs"); + } +} + +export async function POST(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + try { + const body = await request.json(); + const { baselineId, name } = body; + + if (!baselineId) { + return NextResponse.json({ error: "baselineId is required" }, { status: 400 }); + } + + // Create the run record + const run = await metaEvaluationRepository.createValidationRun({ + baselineId, + name, + }); + + // Note: The actual evaluation execution would be triggered separately + // (e.g., via a job queue). For now, we just create the run record. + // The CLI handles the actual pipeline execution. + + return NextResponse.json({ run }); + } catch (error) { + logger.error("Error creating run:", error); + return commonErrors.serverError("Failed to create run"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/runs/start/route.ts b/apps/web/src/app/api/monitor/lab/runs/start/route.ts new file mode 100644 index 000000000..deb0e6b6c --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/runs/start/route.ts @@ -0,0 +1,116 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { prisma, metaEvaluationRepository } from "@roast/db"; +import { getServices } from "@/application/services/ServiceFactory"; + +/** + * Start a validation run: + * 1. Create ValidationRun record + * 2. Get documents from baseline + * 3. Create batch jobs to re-evaluate each document + * 4. Return run ID and job IDs for polling + */ +export async function POST(request: NextRequest) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + try { + const body = await request.json(); + const { baselineId, name, profileId } = body; + + if (!baselineId) { + return NextResponse.json({ error: "baselineId is required" }, { status: 400 }); + } + + // Get baseline info + const baseline = await prisma.validationBaseline.findUnique({ + where: { id: baselineId }, + select: { id: true, name: true, agentId: true }, + }); + + if (!baseline) { + return NextResponse.json({ error: "Baseline not found" }, { status: 404 }); + } + + // Get document IDs from baseline + const documentIds = await metaEvaluationRepository.getBaselineDocumentIds(baselineId); + + if (documentIds.length === 0) { + return NextResponse.json({ error: "Baseline has no documents" }, { status: 400 }); + } + + // Create the validation run + const run = await metaEvaluationRepository.createValidationRun({ + baselineId, + name: name || `Run ${new Date().toLocaleString()}`, + profileId: profileId || undefined, + }); + + // Create batch for the jobs + const batch = await prisma.agentEvalBatch.create({ + data: { + name: `Validation run ${run.id.slice(0, 8)}`, + agentId: baseline.agentId, + requestedDocumentIds: documentIds, + userId, + }, + }); + + // Create evaluations and jobs for each document + const jobIds: string[] = []; + const { jobService } = getServices(); + + for (const documentId of documentIds) { + // Check if evaluation exists + let evaluation = await prisma.evaluation.findFirst({ + where: { + documentId, + agentId: baseline.agentId, + }, + }); + + // Create evaluation if it doesn't exist + if (!evaluation) { + evaluation = await prisma.evaluation.create({ + data: { + documentId, + agentId: baseline.agentId, + }, + }); + } + + // Create job with profile ID for plugin configuration + const job = await jobService.createJob(evaluation.id, batch.id, profileId); + jobIds.push(job.id); + } + + logger.info("Validation run started", { + runId: run.id, + baselineId, + profileId: profileId || null, + documentCount: documentIds.length, + jobCount: jobIds.length, + }); + + return NextResponse.json({ + run: { + id: run.id, + status: "running", + }, + batch: { + id: batch.id, + }, + jobIds, + documentCount: documentIds.length, + }); + } catch (error) { + logger.error("Error starting validation run:", error); + return commonErrors.serverError("Failed to start validation run"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/snapshots/[id]/route.ts b/apps/web/src/app/api/monitor/lab/snapshots/[id]/route.ts new file mode 100644 index 000000000..ed5775923 --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/snapshots/[id]/route.ts @@ -0,0 +1,101 @@ +import { NextRequest, NextResponse } from "next/server"; +import { logger } from "@/infrastructure/logging/logger"; +import { authenticateRequest } from "@/infrastructure/auth/auth-helpers"; +import { commonErrors } from "@/infrastructure/http/api-response-helpers"; +import { isAdmin } from "@/infrastructure/auth/auth"; +import { prisma } from "@roast/db"; +import type { RouteIdParams } from "../../types"; + +export async function GET( + request: NextRequest, + { params }: RouteIdParams +) { + const userId = await authenticateRequest(request); + if (!userId) return commonErrors.unauthorized(); + + const adminCheck = await isAdmin(); + if (!adminCheck) return commonErrors.forbidden(); + + const { id } = await params; + + try { + // Get the run snapshot with full comparison data + const snapshot = await prisma.validationRunSnapshot.findUnique({ + where: { id }, + include: { + baselineSnapshot: { + include: { + evaluationVersion: { + include: { + evaluation: { + include: { + document: { + include: { + versions: { + orderBy: { version: "desc" }, + take: 1, + select: { title: true }, + }, + }, + }, + }, + }, + comments: { + include: { highlight: true }, + }, + }, + }, + }, + }, + newEvaluation: { + include: { + comments: { + include: { highlight: true }, + }, + }, + }, + }, + }); + + if (!snapshot) { + return NextResponse.json({ error: "Snapshot not found" }, { status: 404 }); + } + + // Format baseline comments + const baselineComments = snapshot.baselineSnapshot.evaluationVersion.comments.map((c) => ({ + id: c.id, + quotedText: c.highlight.quotedText, + header: c.header, + description: c.description, + importance: c.importance, + })); + + // Format current comments + const currentComments = snapshot.newEvaluation.comments.map((c) => ({ + id: c.id, + quotedText: c.highlight.quotedText, + header: c.header, + description: c.description, + importance: c.importance, + })); + + return NextResponse.json({ + snapshot: { + id: snapshot.id, + status: snapshot.status, + keptCount: snapshot.keptCount, + newCount: snapshot.newCount, + lostCount: snapshot.lostCount, + documentTitle: + snapshot.baselineSnapshot.evaluationVersion.evaluation.document.versions[0]?.title || + "Unknown", + comparisonData: snapshot.comparisonData, + baselineComments, + currentComments, + }, + }); + } catch (error) { + logger.error("Error fetching snapshot:", error); + return commonErrors.serverError("Failed to fetch snapshot"); + } +} diff --git a/apps/web/src/app/api/monitor/lab/types.ts b/apps/web/src/app/api/monitor/lab/types.ts new file mode 100644 index 000000000..f2904604a --- /dev/null +++ b/apps/web/src/app/api/monitor/lab/types.ts @@ -0,0 +1,10 @@ +/** + * Shared types for Lab API routes + */ + +/** + * Next.js 15 dynamic route params for routes with [id] segment + */ +export interface RouteIdParams { + params: Promise<{ id: string }>; +} diff --git a/apps/web/src/app/monitor/client-layout.tsx b/apps/web/src/app/monitor/client-layout.tsx index be6fd5bd5..162019273 100644 --- a/apps/web/src/app/monitor/client-layout.tsx +++ b/apps/web/src/app/monitor/client-layout.tsx @@ -55,6 +55,12 @@ export default function MonitorLayout({ children }: MonitorLayoutProps) { > Docs + + Lab + void; + onDelete: () => void; +} + +export function BaselineCard({ baseline, isSelected, onSelect, onDelete }: BaselineCardProps) { + return ( +
+
+
+

{baseline.name}

+ {baseline.description && ( +

{baseline.description}

+ )} +
+ {baseline.snapshotCount} documents + {formatDate(baseline.createdAt)} + {baseline.commitHash && ( + {baseline.commitHash.slice(0, 7)} + )} +
+
+ +
+
+ ); +} diff --git a/apps/web/src/app/monitor/lab/components/baselines/BaselineList.tsx b/apps/web/src/app/monitor/lab/components/baselines/BaselineList.tsx new file mode 100644 index 000000000..cfd437782 --- /dev/null +++ b/apps/web/src/app/monitor/lab/components/baselines/BaselineList.tsx @@ -0,0 +1,27 @@ +"use client"; + +import { BaselineCard } from "./BaselineCard"; +import type { Baseline } from "../../types"; + +interface BaselineListProps { + baselines: Baseline[]; + selectedId: string | null; + onSelect: (baseline: Baseline) => void; + onDelete: (id: string) => void; +} + +export function BaselineList({ baselines, selectedId, onSelect, onDelete }: BaselineListProps) { + return ( +
+ {baselines.map((baseline) => ( + onSelect(baseline)} + onDelete={() => onDelete(baseline.id)} + /> + ))} +
+ ); +} diff --git a/apps/web/src/app/monitor/lab/components/baselines/CreateBaselineModal.tsx b/apps/web/src/app/monitor/lab/components/baselines/CreateBaselineModal.tsx new file mode 100644 index 000000000..ea27c66c7 --- /dev/null +++ b/apps/web/src/app/monitor/lab/components/baselines/CreateBaselineModal.tsx @@ -0,0 +1,359 @@ +"use client"; + +import { useState, useEffect, useCallback } from "react"; +import { XMarkIcon, MagnifyingGlassIcon, ChevronDownIcon, ChevronRightIcon } from "@heroicons/react/24/outline"; +import type { CorpusDocument, EvaluationVersionSummary } from "../../types"; +import { truncate } from "../../utils/formatters"; + +interface CreateBaselineModalProps { + agentId: string; + onClose: () => void; + onCreated: () => void; +} + +function getDefaultName(): string { + const now = new Date(); + const date = now.toISOString().split("T")[0]; + return `Baseline ${date}`; +} + +function formatDate(dateStr: string): string { + return new Date(dateStr).toLocaleString(); +} + +export function CreateBaselineModal({ agentId, onClose, onCreated }: CreateBaselineModalProps) { + const [name, setName] = useState(getDefaultName); + const [description, setDescription] = useState(""); + const [searchQuery, setSearchQuery] = useState(""); + const [documents, setDocuments] = useState([]); + const [loading, setLoading] = useState(true); + const [creating, setCreating] = useState(false); + + // Selected version per document: Map + const [selectedVersions, setSelectedVersions] = useState>(new Map()); + + // Expanded documents for viewing versions + const [expandedDocId, setExpandedDocId] = useState(null); + const [versions, setVersions] = useState([]); + const [loadingVersions, setLoadingVersions] = useState(false); + + // Cache versions per document to avoid refetching + const [versionsCache, setVersionsCache] = useState>(new Map()); + + const fetchDocuments = useCallback(async (filter?: string) => { + setLoading(true); + try { + const params = new URLSearchParams({ agentId }); + if (filter) params.set("filter", filter); + const res = await fetch(`/api/monitor/lab/corpus?${params}`); + if (res.ok) { + const data = await res.json(); + setDocuments(data.documents); + } + } finally { + setLoading(false); + } + }, [agentId]); + + const fetchVersions = useCallback(async (documentId: string): Promise => { + // Check cache first + const cached = versionsCache.get(documentId); + if (cached) return cached; + + setLoadingVersions(true); + try { + const params = new URLSearchParams({ agentId, documentId }); + const res = await fetch(`/api/monitor/lab/corpus/versions?${params}`); + if (res.ok) { + const data = await res.json(); + const fetchedVersions = data.versions as EvaluationVersionSummary[]; + setVersionsCache(prev => new Map(prev).set(documentId, fetchedVersions)); + return fetchedVersions; + } + } finally { + setLoadingVersions(false); + } + return []; + }, [agentId, versionsCache]); + + useEffect(() => { + void fetchDocuments(); + }, [fetchDocuments]); + + const handleSearch = () => { + void fetchDocuments(searchQuery || undefined); + }; + + const toggleDocument = async (docId: string) => { + const newSelected = new Map(selectedVersions); + if (newSelected.has(docId)) { + // Deselect document + newSelected.delete(docId); + } else { + // Select document - auto-select latest version + const docVersions = versionsCache.get(docId) || await fetchVersions(docId); + if (docVersions.length > 0) { + newSelected.set(docId, docVersions[0].id); // Latest version (ordered desc) + } + } + setSelectedVersions(newSelected); + }; + + const selectVersion = (docId: string, versionId: string) => { + const newSelected = new Map(selectedVersions); + newSelected.set(docId, versionId); + setSelectedVersions(newSelected); + }; + + const toggleExpand = async (docId: string) => { + if (expandedDocId === docId) { + setExpandedDocId(null); + setVersions([]); + } else { + setExpandedDocId(docId); + const docVersions = await fetchVersions(docId); + setVersions(docVersions); + } + }; + + const handleSelectAll = async () => { + const newSelected = new Map(); + for (const doc of documents) { + const docVersions = versionsCache.get(doc.documentId) || await fetchVersions(doc.documentId); + if (docVersions.length > 0) { + newSelected.set(doc.documentId, docVersions[0].id); + } + } + setSelectedVersions(newSelected); + }; + + const handleSelectNone = () => { + setSelectedVersions(new Map()); + }; + + const handleCreate = async () => { + if (!name.trim() || selectedVersions.size === 0) return; + setCreating(true); + try { + const res = await fetch("/api/monitor/lab/baselines", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + agentId, + name: name.trim(), + description: description.trim() || undefined, + evaluationVersionIds: Array.from(selectedVersions.values()), + }), + }); + if (res.ok) { + onCreated(); + } + } finally { + setCreating(false); + } + }; + + return ( +
+
+ {/* Header */} +
+

Create Validation Baseline

+ +
+ + {/* Content */} +
+ {/* Name & Description */} +
+
+ + setName(e.target.value)} + placeholder="e.g., Pre-refactor baseline" + className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" + /> +
+
+ + setDescription(e.target.value)} + placeholder="e.g., Baseline before filter changes" + className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" + /> +
+
+ + {/* Document Selection */} +
+
+ +
+ + +
+
+ + {/* Search */} +
+
+ + setSearchQuery(e.target.value)} + onKeyDown={(e) => e.key === "Enter" && handleSearch()} + placeholder="Search documents..." + className="w-full pl-9 pr-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500 text-sm" + /> +
+ +
+ +

+ Expand a document to select a specific version. By default, the latest version is used. +

+ + {/* Document List */} +
+ {loading ? ( +
Loading documents...
+ ) : documents.length === 0 ? ( +
No documents found
+ ) : ( +
+ {documents.map((doc) => { + const isSelected = selectedVersions.has(doc.documentId); + const selectedVersionId = selectedVersions.get(doc.documentId); + + return ( +
+
+ {/* Expand button */} + + + {/* Checkbox */} + void toggleDocument(doc.documentId)} + className="h-4 w-4 text-blue-600 rounded border-gray-300" + /> + + {/* Document info */} +
+

+ {truncate(doc.title, 60)} +

+

+ {doc.evaluationCount} evaluations + {isSelected && selectedVersionId && ( + + (version selected) + + )} +

+
+
+ + {/* Expanded versions */} + {expandedDocId === doc.documentId && ( +
+ {loadingVersions ? ( +

Loading versions...

+ ) : versions.length === 0 ? ( +

No versions found

+ ) : ( +
+

+ Select version ({versions.length} available): +

+ {versions.map((v, idx) => ( + + ))} +
+ )} +
+ )} +
+ ); + })} +
+ )} +
+
+
+ + {/* Footer */} +
+ + +
+
+
+ ); +} diff --git a/apps/web/src/app/monitor/lab/components/evaluations/AllEvaluationsList.tsx b/apps/web/src/app/monitor/lab/components/evaluations/AllEvaluationsList.tsx new file mode 100644 index 000000000..8bd39a159 --- /dev/null +++ b/apps/web/src/app/monitor/lab/components/evaluations/AllEvaluationsList.tsx @@ -0,0 +1,200 @@ +"use client"; + +import { useState } from "react"; +import { + ChevronDownIcon, + ChevronRightIcon, + ArrowPathIcon, + DocumentTextIcon, + ClockIcon, + CurrencyDollarIcon, +} from "@heroicons/react/24/outline"; +import type { EvaluationWithTelemetry } from "../../hooks/useAllEvaluations"; +import { PipelineView } from "../snapshots/PipelineView"; +import { formatDuration, formatCost } from "../snapshots/pipelineUtils"; + +interface AllEvaluationsListProps { + evaluations: EvaluationWithTelemetry[]; + loading: boolean; + error: string | null; + onRefresh: () => void; +} + +export function AllEvaluationsList({ + evaluations, + loading, + error, + onRefresh, +}: AllEvaluationsListProps) { + const [expandedId, setExpandedId] = useState(null); + + const toggleExpanded = (id: string) => { + setExpandedId(expandedId === id ? null : id); + }; + + if (loading && evaluations.length === 0) { + return ( +
+ + Loading evaluations... +
+ ); + } + + if (error) { + return ( +
+

Error: {error}

+ +
+ ); + } + + if (evaluations.length === 0) { + return ( +
+ +

No evaluations with telemetry found.

+

Run some evaluations in the main UI to see them here.

+
+ ); + } + + return ( +
+ {/* Header with refresh button */} +
+ + {evaluations.length} recent evaluation{evaluations.length !== 1 ? "s" : ""} + + +
+ + {/* Evaluation list */} +
+ {evaluations.map((evaluation) => ( + toggleExpanded(evaluation.id)} + /> + ))} +
+
+ ); +} + +interface EvaluationRowProps { + evaluation: EvaluationWithTelemetry; + isExpanded: boolean; + onToggle: () => void; +} + +function EvaluationRow({ evaluation, isExpanded, onToggle }: EvaluationRowProps) { + const telemetry = evaluation.telemetry; + const hasTelemetry = telemetry && (telemetry.stages || telemetry.extractionPhase); + + // Format date + const date = new Date(evaluation.createdAt); + const formattedDate = date.toLocaleDateString("en-US", { + month: "short", + day: "numeric", + hour: "2-digit", + minute: "2-digit", + }); + + return ( +
+ {/* Row header - always visible */} + + + {/* Expanded content - PipelineView */} + {isExpanded && hasTelemetry && ( +
+ +
+ )} +
+ ); +} diff --git a/apps/web/src/app/monitor/lab/components/history/RunDetail.tsx b/apps/web/src/app/monitor/lab/components/history/RunDetail.tsx new file mode 100644 index 000000000..807c23fa5 --- /dev/null +++ b/apps/web/src/app/monitor/lab/components/history/RunDetail.tsx @@ -0,0 +1,131 @@ +"use client"; + +import { useState, useEffect } from "react"; +import { ChevronRightIcon, ChevronLeftIcon } from "@heroicons/react/24/outline"; +import { formatDate } from "../../utils/formatters"; +import type { ValidationRunDetail, RunSnapshot } from "../../types"; +import { SnapshotComparison } from "../snapshots/SnapshotComparison"; + +interface RunDetailProps { + runId: string; +} + +interface SnapshotRowProps { + snapshot: RunSnapshot; + onClick: () => void; +} + +export function RunDetail({ runId }: RunDetailProps) { + const [run, setRun] = useState(null); + const [loading, setLoading] = useState(true); + const [selectedSnapshot, setSelectedSnapshot] = useState(null); + + useEffect(() => { + const fetchRun = async () => { + setLoading(true); + try { + const res = await fetch(`/api/monitor/lab/runs/${runId}`); + if (res.ok) { + const data = await res.json(); + setRun(data.run); + } + } finally { + setLoading(false); + } + }; + void fetchRun(); + }, [runId]); + + if (loading) { + return
Loading run details...
; + } + + if (!run) { + return
Run not found
; + } + + if (selectedSnapshot) { + return ( +
+ + setSelectedSnapshot(null)} /> +
+ ); + } + + const changedSnapshots = run.snapshots.filter((s) => s.status === "changed"); + const unchangedSnapshots = run.snapshots.filter((s) => s.status === "unchanged"); + + return ( +
+ {/* Summary */} +
+ {formatDate(run.createdAt)} + {run.summary && {run.summary}} +
+ + {/* Changed First */} + {changedSnapshots.length > 0 && ( +
+

+ Changed ({changedSnapshots.length}) +

+
+ {changedSnapshots.map((snapshot) => ( + setSelectedSnapshot(snapshot)} + /> + ))} +
+
+ )} + + {/* Unchanged */} + {unchangedSnapshots.length > 0 && ( +
+

+ Unchanged ({unchangedSnapshots.length}) +

+
+ {unchangedSnapshots.map((snapshot) => ( + setSelectedSnapshot(snapshot)} + /> + ))} +
+
+ )} + + {run.snapshots.length === 0 && ( +
No snapshots in this run
+ )} +
+ ); +} + +function SnapshotRow({ snapshot, onClick }: SnapshotRowProps) { + return ( +
+
+

{snapshot.documentTitle}

+

+ {snapshot.keptCount} kept Β· {snapshot.newCount} new Β· {snapshot.lostCount} lost +

+
+ +
+ ); +} diff --git a/apps/web/src/app/monitor/lab/components/profiles/ExtractorEditor.tsx b/apps/web/src/app/monitor/lab/components/profiles/ExtractorEditor.tsx new file mode 100644 index 000000000..a8fefffff --- /dev/null +++ b/apps/web/src/app/monitor/lab/components/profiles/ExtractorEditor.tsx @@ -0,0 +1,80 @@ +"use client"; + +import { useState } from "react"; +import { PlusIcon } from "@heroicons/react/24/outline"; +import type { ExtractorConfig } from "../../types"; +import { useModels, type ModelInfo } from "../../hooks/useModels"; +import { ModelConfigurator } from "./ModelConfigurator"; +import { ModelSelector } from "./ModelSelector"; + +interface ExtractorEditorProps { + extractors: ExtractorConfig[]; + onChange: (extractors: ExtractorConfig[]) => void; + disabled?: boolean; +} + +export function ExtractorEditor({ extractors, onChange, disabled }: ExtractorEditorProps) { + const { models, loading: modelsLoading } = useModels(); + const [addingExtractor, setAddingExtractor] = useState(false); + + const updateExtractor = (index: number, updates: Partial) => { + const newExtractors = [...extractors]; + newExtractors[index] = { ...newExtractors[index], ...updates }; + onChange(newExtractors); + }; + + const removeExtractor = (index: number) => { + if (extractors.length <= 1) return; + onChange(extractors.filter((_, i) => i !== index)); + }; + + const addExtractor = (model: ModelInfo) => { + onChange([ + ...extractors, + { model: model.id, temperature: "default", thinking: false }, + ]); + setAddingExtractor(false); + }; + + return ( +
+ {extractors.map((ext, index) => ( + updateExtractor(index, updates)} + disabled={disabled} + label={index + 1} + colorTheme="blue" + showProvider={true} + showDelete={true} + onDelete={() => removeExtractor(index)} + deleteDisabled={extractors.length <= 1} + deleteDisabledReason="Cannot remove last extractor" + /> + ))} + + {/* Add Extractor Button / Model Selector */} + {!disabled && ( + addingExtractor ? ( +
+ setAddingExtractor(false)} + /> +
+ ) : ( + + ) + )} +
+ ); +} diff --git a/apps/web/src/app/monitor/lab/components/profiles/FilterChainEditor.tsx b/apps/web/src/app/monitor/lab/components/profiles/FilterChainEditor.tsx new file mode 100644 index 000000000..b1691c060 --- /dev/null +++ b/apps/web/src/app/monitor/lab/components/profiles/FilterChainEditor.tsx @@ -0,0 +1,491 @@ +"use client"; + +import { useState } from "react"; +import { + ChevronUpIcon, + ChevronDownIcon, + TrashIcon, + PlusIcon, + ChevronRightIcon, +} from "@heroicons/react/24/outline"; +import type { + FilterChainItem, + SupportedElsewhereFilterConfig, + PrincipleOfCharityFilterConfig, + SeverityFilterConfig, + ConfidenceFilterConfig, +} from "../../types"; +import { AVAILABLE_FILTER_TYPES } from "../../types"; +import { ModelConfigurator } from "./ModelConfigurator"; +import { getModelDisplayName } from "./ModelSelector"; + +interface FilterChainEditorProps { + filters: FilterChainItem[]; + onChange: (filters: FilterChainItem[]) => void; + disabled?: boolean; + defaultFilterPrompt?: string; +} + +export function FilterChainEditor({ + filters, + onChange, + disabled, + defaultFilterPrompt, +}: FilterChainEditorProps) { + const [showAddMenu, setShowAddMenu] = useState(false); + + const moveFilter = (index: number, direction: "up" | "down") => { + if (disabled) return; + const newFilters = [...filters]; + const newIndex = direction === "up" ? index - 1 : index + 1; + if (newIndex < 0 || newIndex >= filters.length) return; + [newFilters[index], newFilters[newIndex]] = [newFilters[newIndex], newFilters[index]]; + onChange(newFilters); + }; + + const removeFilter = (index: number) => { + if (disabled) return; + onChange(filters.filter((_, i) => i !== index)); + }; + + const updateFilter = (index: number, updates: Partial) => { + if (disabled) return; + const newFilters = [...filters]; + newFilters[index] = { ...newFilters[index], ...updates } as FilterChainItem; + onChange(newFilters); + }; + + const toggleFilter = (index: number) => { + updateFilter(index, { enabled: !filters[index].enabled }); + }; + + const addFilter = (type: FilterChainItem["type"]) => { + if (disabled) return; + const id = `filter-${Date.now()}`; + let newFilter: FilterChainItem; + + switch (type) { + case "principle-of-charity": + newFilter = { + id, + type: "principle-of-charity", + enabled: true, + model: "claude-sonnet-4-5-20250929", + temperature: 0.2, + }; + break; + case "supported-elsewhere": + newFilter = { + id, + type: "supported-elsewhere", + enabled: true, + model: "claude-sonnet-4-5-20250929", + temperature: 0.1, + }; + break; + case "severity": + newFilter = { + id, + type: "severity", + enabled: true, + minSeverity: 50, + }; + break; + case "confidence": + newFilter = { + id, + type: "confidence", + enabled: true, + minConfidence: 50, + }; + break; + default: { + const _exhaustiveCheck: never = type; + throw new Error(`Unknown filter type: ${_exhaustiveCheck}`); + } + } + + onChange([...filters, newFilter]); + setShowAddMenu(false); + }; + + return ( +
+
+
+ +

+ Filters run in sequence. Each filter can remove issues from the pipeline. +

+
+
+ + {/* Filter List */} +
+ {filters.length === 0 ? ( +
+ No filters configured. Add a filter to remove false positives. +
+ ) : ( + filters.map((filter, index) => ( + moveFilter(index, dir)} + onRemove={() => removeFilter(index)} + onUpdate={(updates) => updateFilter(index, updates)} + onToggle={() => toggleFilter(index)} + /> + )) + )} +
+ + {/* Add Filter Button */} + {!disabled && ( +
+ + + {showAddMenu && ( +
+
+ Available Filters +
+ {AVAILABLE_FILTER_TYPES.map((filterType) => ( + + ))} +
+ +
+
+ )} +
+ )} +
+ ); +} + +interface FilterItemEditorProps { + filter: FilterChainItem; + index: number; + totalFilters: number; + disabled?: boolean; + defaultFilterPrompt?: string; + onMove: (direction: "up" | "down") => void; + onRemove: () => void; + onUpdate: (updates: Partial) => void; + onToggle: () => void; +} + +function FilterItemEditor({ + filter, + index, + totalFilters, + disabled, + defaultFilterPrompt, + onMove, + onRemove, + onUpdate, + onToggle, +}: FilterItemEditorProps) { + const [isExpanded, setIsExpanded] = useState(false); + + const filterLabel = AVAILABLE_FILTER_TYPES.find((f) => f.type === filter.type)?.label || filter.type; + + return ( +
+ {/* Header Row */} +
+ {/* Order controls */} + {!disabled && ( +
+ + +
+ )} + + {/* Index badge */} + + {index + 1} + + + {/* Expand/collapse button */} + + + {/* Enable/Disable toggle */} + + + {/* Delete button */} + {!disabled && ( + + )} +
+ + {/* Expanded Settings */} + {isExpanded && ( +
+ {filter.type === "principle-of-charity" && ( + + )} + {filter.type === "supported-elsewhere" && ( + + )} + {filter.type === "severity" && ( + + )} + {filter.type === "confidence" && ( + + )} +
+ )} +
+ ); +} + +// ============================================================================ +// LLM Filter Settings (uses ModelConfigurator) +// ============================================================================ + +interface LLMFilterSettingsProps { + filter: SupportedElsewhereFilterConfig | PrincipleOfCharityFilterConfig; + disabled?: boolean; + defaultPrompt?: string; + onUpdate: (updates: Partial) => void; + description: string; + showCustomPrompt?: boolean; +} + +function LLMFilterSettings({ + filter, + disabled, + defaultPrompt, + onUpdate, + description, + showCustomPrompt, +}: LLMFilterSettingsProps) { + const customPrompt = "customPrompt" in filter ? filter.customPrompt : undefined; + + return ( +
+

{description}

+ + {/* Model Configuration using ModelConfigurator */} + + + {/* Custom Prompt (only for supported-elsewhere filter) */} + {showCustomPrompt && ( +
+
+ Custom Prompt + {customPrompt && ( + + )} +
+