From 56493db02ed9930dfc0773cf0898ab697e5113cb Mon Sep 17 00:00:00 2001 From: Arav Jain Date: Sun, 22 Feb 2026 18:31:57 -0600 Subject: [PATCH 1/2] Add round-robin benchmark feature Adds a systematic round-robin benchmark mode where every model plays against every other model (C(7,2) = 21 matchups with 7 models). Results are persisted to SQLite and displayed on a dedicated /bench page with overall rankings and a head-to-head matrix. New files: bench-db.ts, bench.ts, bench.html, bench.css, bench-frontend.tsx Modified: server.ts (routes + API), frontend.tsx (nav link), admin.tsx (controls) --- admin.tsx | 137 +++++++++ bench-db.ts | 144 +++++++++ bench-frontend.tsx | 727 +++++++++++++++++++++++++++++++++++++++++++++ bench.css | 687 ++++++++++++++++++++++++++++++++++++++++++ bench.html | 19 ++ bench.ts | 339 +++++++++++++++++++++ frontend.tsx | 3 + server.ts | 118 ++++++++ 8 files changed, 2174 insertions(+) create mode 100644 bench-db.ts create mode 100644 bench-frontend.tsx create mode 100644 bench.css create mode 100644 bench.html create mode 100644 bench.ts diff --git a/admin.tsx b/admin.tsx index 423c1d2..aa9ae4c 100644 --- a/admin.tsx +++ b/admin.tsx @@ -2,6 +2,13 @@ import React, { useEffect, useMemo, useState } from "react"; import { createRoot } from "react-dom/client"; import "./admin.css"; +type BenchSummary = { + id: string; + status: string; + totalRounds: number; + completedRounds: number; +} | null; + type AdminSnapshot = { isPaused: boolean; isRunningRound: boolean; @@ -9,6 +16,7 @@ type AdminSnapshot = { completedInMemory: number; persistedRounds: number; viewerCount: number; + bench?: BenchSummary; }; type AdminResponse = { ok: true } & AdminSnapshot; @@ -61,6 +69,7 @@ function App() { const [pending, setPending] = useState(null); const [isResetOpen, setIsResetOpen] = useState(false); const [resetText, setResetText] = useState(""); + const [benchRoundsPerPairing, setBenchRoundsPerPairing] = useState(1); useEffect(() => { let mounted = true; @@ -173,6 +182,53 @@ function App() { } } + async function onBenchStart() { + setError(null); + setPending("bench-start"); + try { + const response = await fetch("/api/bench/start", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ roundsPerPairing: benchRoundsPerPairing }), + cache: "no-store", + }); + if (!response.ok) { + const text = await response.text(); + throw new Error(text || `Request failed (${response.status})`); + } + try { + const statusData = await requestAdminJson("/api/admin/status"); + setSnapshot(statusData); + } catch {} + } catch (err) { + setError(err instanceof Error ? err.message : "Failed to start benchmark"); + } finally { + setPending(null); + } + } + + async function onBenchCancel() { + setError(null); + setPending("bench-cancel"); + try { + const response = await fetch("/api/bench/cancel", { + method: "POST", + cache: "no-store", + }); + if (!response.ok) { + throw new Error(await readErrorMessage(response)); + } + try { + const statusData = await requestAdminJson("/api/admin/status"); + setSnapshot(statusData); + } catch {} + } catch (err) { + setError(err instanceof Error ? err.message : "Failed to cancel benchmark"); + } finally { + setPending(null); + } + } + async function onLogout() { setError(null); setPending("logout"); @@ -263,6 +319,7 @@ function App() {