diff --git a/bench/README.md b/bench/README.md
new file mode 100644
index 0000000..737d30b
--- /dev/null
+++ b/bench/README.md
@@ -0,0 +1,64 @@
+# Quipbench
+
+Standalone benchmark runner + static dashboard for Quipslop models.
+
+## What it does
+
+- Runs live OpenRouter self-play rounds (same mechanics as the main game)
+- Computes Elo-first leaderboard with wins/games/win-rate
+- Stores run + match + rating records in `bench/quipbench.sqlite`
+- Exports latest snapshot to `bench/out/latest.json` and `bench/out/latest.js`
+- Renders a standalone dashboard at `bench/dashboard/index.html`
+
+## Prerequisites
+
+- Bun
+- `OPENROUTER_API_KEY` set in environment for live runs
+
+## Commands
+
+From repo root:
+
+- `bun run quipbench:run`
+- `bun run quipbench:export`
+- `bun run quipbench:open`
+
+### Run options
+
+`quipbench:run` supports CLI flags:
+
+- `--rounds=100`
+- `--concurrency=4`
+- `--k=24`
+- `--initialElo=1500`
+- `--seed=12345`
+- `--out=bench/out`
+- `--db=bench/quipbench.sqlite`
+
+Example:
+
+```bash
+bun bench/run.ts --rounds=150 --concurrency=6 --seed=42
+```
+
+## Output contract (`latest` snapshot)
+
+`bench/out/latest.json` and `bench/out/latest.js` contain:
+
+- `runMeta`: `runId`, `startedAt`, `endedAt`, `roundsRequested`, `roundsCompleted`, `failures`, `concurrency`, `eloK`, `initialElo`, `seed`
+- `leaderboard[]`: `rank`, `modelId`, `modelName`, `elo`, `wins`, `games`, `winRate`
+- `chart[]`: `{ modelName, elo }`
+
+## Dashboard
+
+Open `bench/dashboard/index.html` directly or via:
+
+```bash
+bun run quipbench:open
+```
+
+The dashboard reads `../out/latest.js` and shows:
+
+- run metadata summary
+- vertical Elo bar chart with model names under each bar
+- leaderboard table
diff --git a/bench/config.ts b/bench/config.ts
new file mode 100644
index 0000000..01b15ab
--- /dev/null
+++ b/bench/config.ts
@@ -0,0 +1,29 @@
+import { join } from "node:path";
+
+export const BENCH_DIR = import.meta.dir;
+
+export const DEFAULT_ROUNDS = 100;
+export const DEFAULT_CONCURRENCY = 4;
+export const DEFAULT_ELO_K = 24;
+export const DEFAULT_INITIAL_ELO = 1500;
+
+export const DEFAULT_DB_PATH = join(BENCH_DIR, "quipbench.sqlite");
+export const DEFAULT_OUTPUT_DIR = join(BENCH_DIR, "out");
+export const DEFAULT_LATEST_JSON_PATH = join(DEFAULT_OUTPUT_DIR, "latest.json");
+export const DEFAULT_LATEST_JS_PATH = join(DEFAULT_OUTPUT_DIR, "latest.js");
+
+export function parsePositiveInt(
+  value: string | undefined,
+  fallback: number,
+): number {
+  const parsed = Number.parseInt(value ?? "", 10);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
+}
+
+export function parsePositiveNumber(
+  value: string | undefined,
+  fallback: number,
+): number {
+  const parsed = Number.parseFloat(value ?? "");
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
+}
diff --git a/bench/dashboard/app.js b/bench/dashboard/app.js
new file mode 100644
index 0000000..42fa1f8
--- /dev/null
+++ b/bench/dashboard/app.js
@@ -0,0 +1,277 @@
+const root = document.getElementById("app");
+const snapshot = window.__QUIPBENCH_LATEST__;
+const logoImageCache = new Map();
+
+function logoFor(name) {
+  if (name.includes("Gemini")) return "./assets/logos/gemini.svg";
+  if (name.includes("Kimi")) return "./assets/logos/kimi.svg";
+  if (name.includes("DeepSeek")) return "./assets/logos/deepseek.svg";
+  if (name.includes("GLM")) return "./assets/logos/glm.svg";
+  if (name.includes("GPT")) return "./assets/logos/openai.svg";
+  if (name.includes("Opus") || name.includes("Sonnet")) return "./assets/logos/claude.svg";
+  if (name.includes("Grok")) return "./assets/logos/grok.svg";
+  if (name.includes("MiniMax")) return "./assets/logos/minimax.svg";
+  return null;
+}
+
+function formatDate(ts) {
+  const date = new Date(ts);
+  if (Number.isNaN(date.getTime())) return ts;
+  return date.toLocaleString();
+}
+
+function rowHtml(row) {
+  const logo = logoFor(row.modelName);
+  return `
+    <tr>
+      <td class="mono rank">${row.rank}</td>
+      <td>
+        <div class="model-cell">
+          ${logo ? `<img src="${logo}" alt="" />` : ""}
+          <span>${row.modelName}</span>
+        </div>
+      </td>
+      <td class="mono">${row.elo.toFixed(2)}</td>
+      <td class="mono">${row.wins}</td>
+      <td class="mono">${row.games}</td>
+      <td class="mono">${row.winRate.toFixed(2)}%</td>
+    </tr>
+  `;
+}
+
+function renderChart(rows) {
+  const chartCanvas = document.getElementById("elo-chart");
+  if (!chartCanvas || typeof Chart === "undefined" || rows.length === 0) return;
+
+  const sorted = [...rows].sort((a, b) => b.elo - a.elo);
+  const labels = sorted.map((row) => row.modelName);
+  const data = sorted.map((row) => Number(row.elo.toFixed(2)));
+  const max = Math.max(...data);
+  const min = Math.min(...data);
+  const yMin = Math.floor(min - 20);
+  const yMax = Math.ceil(max + 10);
+
+  const iconPlugin = {
+    id: "barIcons",
+    afterDatasetsDraw(chart) {
+      const { ctx } = chart;
+      const meta = chart.getDatasetMeta(0);
+      const topY = chart.scales.y.getPixelForValue(yMax);
+
+      meta.data.forEach((bar, index) => {
+        const modelName = labels[index];
+        const iconUrl = logoFor(modelName);
+        if (!iconUrl) return;
+
+        let img = logoImageCache.get(iconUrl);
+        if (!img) {
+          img = new Image();
+          img.src = iconUrl;
+          img.onload = () => chart.draw();
+          logoImageCache.set(iconUrl, img);
+        }
+        if (!img.complete || !img.naturalWidth) return;
+
+        const iconSize = 18;
+        const x = bar.x - iconSize / 2;
+        const y = Math.max(topY + 4, bar.y - iconSize - 6);
+
+        ctx.save();
+        ctx.fillStyle = "#0a0a0a";
+        ctx.strokeStyle = "#2a2a2a";
+        ctx.lineWidth = 1;
+        if (typeof ctx.roundRect === "function") {
+          ctx.beginPath();
+          ctx.roundRect(x - 3, y - 3, iconSize + 6, iconSize + 6, 6);
+          ctx.fill();
+          ctx.stroke();
+        } else {
+          ctx.fillRect(x - 3, y - 3, iconSize + 6, iconSize + 6);
+          ctx.strokeRect(x - 3, y - 3, iconSize + 6, iconSize + 6);
+        }
+        ctx.drawImage(img, x, y, iconSize, iconSize);
+        ctx.restore();
+      });
+    },
+  };
+
+  new Chart(chartCanvas, {
+    type: "bar",
+    data: {
+      labels,
+      datasets: [
+        {
+          label: "Elo",
+          data,
+          borderWidth: 1,
+          borderColor: "#3c2018",
+          backgroundColor: [
+            "#e8ab97",
+            "#e09a81",
+            "#d98367",
+            "#d97757",
+            "#ca6b4b",
+            "#bc6141",
+            "#ae5637",
+            "#9f4b2d",
+          ],
+          borderRadius: 6,
+          maxBarThickness: 72,
+        },
+      ],
+    },
+    options: {
+      responsive: true,
+      maintainAspectRatio: false,
+      plugins: {
+        legend: { display: false },
+        tooltip: {
+          displayColors: false,
+          backgroundColor: "#101010",
+          borderColor: "#2d2d2d",
+          borderWidth: 1,
+          titleColor: "#f0f0f0",
+          bodyColor: "#d4d4d4",
+          callbacks: {
+            label(context) {
+              return `Elo ${Number(context.raw).toFixed(2)}`;
+            },
+          },
+        },
+      },
+      scales: {
+        x: {
+          ticks: {
+            color: "#b8b8b8",
+            maxRotation: 0,
+            autoSkip: false,
+            font: { family: "JetBrains Mono", size: 11 },
+          },
+          grid: { color: "rgba(255,255,255,0.04)" },
+        },
+        y: {
+          min: yMin,
+          max: yMax,
+          ticks: {
+            color: "#8b8b8b",
+            font: { family: "JetBrains Mono", size: 11 },
+          },
+          grid: { color: "rgba(255,255,255,0.07)" },
+        },
+      },
+    },
+    plugins: [iconPlugin],
+  });
+}
+
+function renderEmpty() {
+  root.innerHTML = `
+    <main class="shell">
+      <header class="header">
+        <div class="brand">
+          <img src="./assets/logo.svg" alt="Quipbench" />
+          <h1>Quipbench</h1>
+        </div>
+      </header>
+      <section class="panel">
+        <h2>No snapshot found</h2>
+        <p>Run a benchmark first: <code>bun run quipbench:run</code></p>
+        <p>Then refresh this page. Snapshot expected at <code>bench/out/latest.js</code>.</p>
+      </section>
+    </main>
+  `;
+}
+
+function render(snapshotData) {
+  const meta = snapshotData.runMeta;
+  const leaderboard = snapshotData.leaderboard;
+  const champion = leaderboard[0];
+
+  root.innerHTML = `
+    <main class="shell">
+      <header class="header">
+        <div class="brand">
+          <img src="./assets/logo.svg" alt="Quipbench" />
+          <h1>Quipbench</h1>
+        </div>
+        <div class="header-links">
+          <a href="https://github.com/T3-Content/quipslop" target="_blank" rel="noreferrer">
+            <svg viewBox="0 0 24 24" aria-hidden="true">
+              <path d="M12 .5a12 12 0 0 0-3.79 23.39c.6.11.82-.26.82-.58v-2.05c-3.34.73-4.04-1.41-4.04-1.41-.55-1.36-1.33-1.72-1.33-1.72-1.09-.73.08-.72.08-.72 1.2.09 1.83 1.2 1.83 1.2 1.07 1.79 2.81 1.27 3.49.97.11-.75.42-1.27.76-1.56-2.67-.3-5.47-1.31-5.47-5.84 0-1.29.47-2.35 1.24-3.18-.13-.3-.54-1.52.12-3.16 0 0 1.01-.32 3.3 1.21a11.63 11.63 0 0 1 6 0c2.28-1.53 3.29-1.21 3.29-1.21.66 1.64.25 2.86.12 3.16.77.83 1.24 1.89 1.24 3.18 0 4.54-2.8 5.54-5.48 5.84.43.37.81 1.09.81 2.21v3.27c0 .32.22.7.83.58A12 12 0 0 0 12 .5Z"/>
+            </svg>
+            <span>GitHub</span>
+          </a>
+          <a href="https://quipslop.com/" target="_blank" rel="noreferrer">
+            <svg viewBox="0 0 24 24" aria-hidden="true">
+              <path d="M12 2a10 10 0 1 0 10 10A10 10 0 0 0 12 2Zm7.88 9h-3.07a15.9 15.9 0 0 0-1.18-5A8.04 8.04 0 0 1 19.88 11ZM12 4.04c1.04 1.17 1.92 3.24 2.35 5.96H9.65C10.08 7.28 10.96 5.21 12 4.04ZM4.12 13h3.07a15.9 15.9 0 0 0 1.18 5A8.04 8.04 0 0 1 4.12 13Zm3.07-2H4.12a8.04 8.04 0 0 1 4.25-5 15.9 15.9 0 0 0-1.18 5ZM12 19.96c-1.04-1.17-1.92-3.24-2.35-5.96h4.7c-.43 2.72-1.31 4.79-2.35 5.96ZM14.57 13H9.43a14.4 14.4 0 0 1 0-2h5.14a14.4 14.4 0 0 1 0 2Zm1.06 5a15.9 15.9 0 0 0 1.18-5h3.07a8.04 8.04 0 0 1-4.25 5Z"/>
+            </svg>
+            <span>Website</span>
+          </a>
+        </div>
+        <div class="meta-pills mono">
+          <span class="pill">Run ${meta.runId}</span>
+          <span class="pill">${meta.roundsCompleted}/${meta.roundsRequested} rounds</span>
+          <span class="pill">${meta.failures} failures</span>
+        </div>
+      </header>
+
+      <section class="panel panel--summary">
+        <div class="summary-block">
+          <div class="summary-label mono">Started</div>
+          <div class="summary-value">${formatDate(meta.startedAt)}</div>
+        </div>
+        <div class="summary-block">
+          <div class="summary-label mono">Ended</div>
+          <div class="summary-value">${formatDate(meta.endedAt)}</div>
+        </div>
+        <div class="summary-block">
+          <div class="summary-label mono">Champion</div>
+          <div class="summary-value">${champion ? champion.modelName : "-"}</div>
+        </div>
+        <div class="summary-block">
+          <div class="summary-label mono">Seed</div>
+          <div class="summary-value mono">${meta.seed}</div>
+        </div>
+      </section>
+
+      <section class="panel">
+        <div class="panel-head">
+          <h2>Elo Leaderboard</h2>
+        </div>
+        <div class="chart-shell">
+          <canvas id="elo-chart" aria-label="Elo leaderboard bar chart"></canvas>
+        </div>
+      </section>
+
+      <section class="panel">
+        <div class="panel-head">
+          <h2>Leaderboard Table</h2>
+        </div>
+        <div class="table-wrap">
+          <table>
+            <thead>
+              <tr>
+                <th class="mono">#</th>
+                <th>Model</th>
+                <th class="mono">Elo</th>
+                <th class="mono">Wins</th>
+                <th class="mono">Games</th>
+                <th class="mono">Win Rate</th>
+              </tr>
+            </thead>
+            <tbody>
+              ${leaderboard.map(rowHtml).join("")}
+            </tbody>
+          </table>
+        </div>
+      </section>
+    </main>
+  `;
+}
+
+if (!snapshot || !snapshot.leaderboard) {
+  renderEmpty();
+} else {
+  render(snapshot);
+  renderChart(snapshot.leaderboard);
+}
diff --git a/bench/dashboard/app.ts b/bench/dashboard/app.ts
new file mode 100644
index 0000000..8997c7e
--- /dev/null
+++ b/bench/dashboard/app.ts
@@ -0,0 +1,3 @@
+// Source shim to satisfy TypeScript-based workflows.
+// The standalone dashboard intentionally runs plain JS for direct browser compatibility.
+import "./app.js";
diff --git a/bench/dashboard/assets/logo.svg b/bench/dashboard/assets/logo.svg
new file mode 100644
index 0000000..719d15b
--- /dev/null
+++ b/bench/dashboard/assets/logo.svg
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="1200pt" height="1200pt" version="1.1" viewBox="0 0 1200 1200" xmlns="http://www.w3.org/2000/svg" fill="#ededed">
+ <path d="m900 1200h-600c-110.3 0-200-89.699-200-200v-600c0-110.3 89.699-200 200-200h600c110.3 0 200 89.699 200 200v600c0 110.3-89.699 200-200 200zm-600-950c-82.699 0-150 67.301-150 150v600c0 82.699 67.301 150 150 150h600c82.699 0 150-67.301 150-150v-600c0-82.699-67.301-150-150-150z"/>
+ <path d="m925 1050h-650c-13.801 0-25-11.25-25-25v-225c0-13.75 11.199-25 25-25s25 11.25 25 25v200h600v-200c0-13.75 11.25-25 25-25s25 11.25 25 25v225c0 13.75-11.25 25-25 25z"/>
+ <path d="m525 650h-150c-13.801 0-25-11.25-25-25v-150c0-13.801 11.199-25 25-25h150c13.801 0 25 11.199 25 25v150c0 13.75-11.199 25-25 25zm-125-50h100v-100h-100z"/>
+ <path d="m825 650h-150c-13.75 0-25-11.25-25-25v-150c0-13.801 11.25-25 25-25h150c13.75 0 25 11.199 25 25v150c0 13.75-11.25 25-25 25zm-125-50h100v-100h-100z"/>
+ <path d="m475 150h-100c-13.801 0-25-11.199-25-25v-100c0-13.801 11.199-25 25-25h100c13.801 0 25 11.199 25 25v100c0 13.801-11.199 25-25 25zm-75-50h50v-50h-50z"/>
+ <path d="m650 225c-13.75 0-25-11.199-25-25 0-68.898-56.102-125-125-125-13.801 0-25-11.199-25-25s11.199-25 25-25c96.5 0 175 78.5 175 175 0 13.801-11.25 25-25 25z"/>
+</svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/claude.svg b/bench/dashboard/assets/logos/claude.svg
new file mode 100644
index 0000000..d300701
--- /dev/null
+++ b/bench/dashboard/assets/logos/claude.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="256" height="257" preserveAspectRatio="xMidYMid" viewBox="0 0 256 257"><path fill="#D97757" d="m50.228 170.321 50.357-28.257.843-2.463-.843-1.361h-2.462l-8.426-.518-28.775-.778-24.952-1.037-24.175-1.296-6.092-1.297L0 125.796l.583-3.759 5.12-3.434 7.324.648 16.202 1.101 24.304 1.685 17.629 1.037 26.118 2.722h4.148l.583-1.685-1.426-1.037-1.101-1.037-25.147-17.045-27.22-18.017-14.258-10.37-7.713-5.25-3.888-4.925-1.685-10.758 7-7.713 9.397.649 2.398.648 9.527 7.323 20.35 15.75L94.817 91.9l3.889 3.24 1.555-1.102.195-.777-1.75-2.917-14.453-26.118-15.425-26.572-6.87-11.018-1.814-6.61c-.648-2.723-1.102-4.991-1.102-7.778l7.972-10.823L71.42 0 82.05 1.426l4.472 3.888 6.61 15.101 10.694 23.786 16.591 32.34 4.861 9.592 2.592 8.879.973 2.722h1.685v-1.556l1.36-18.211 2.528-22.36 2.463-28.776.843-8.1 4.018-9.722 7.971-5.25 6.222 2.981 5.12 7.324-.713 4.73-3.046 19.768-5.962 30.98-3.889 20.739h2.268l2.593-2.593 10.499-13.934 17.628-22.036 7.778-8.749 9.073-9.657 5.833-4.601h11.018l8.1 12.055-3.628 12.443-11.342 14.388-9.398 12.184-13.48 18.147-8.426 14.518.778 1.166 2.01-.194 30.46-6.481 16.462-2.982 19.637-3.37 8.88 4.148.971 4.213-3.5 8.62-20.998 5.184-24.628 4.926-36.682 8.685-.454.324.519.648 16.526 1.555 7.065.389h17.304l32.21 2.398 8.426 5.574 5.055 6.805-.843 5.184-12.962 6.611-17.498-4.148-40.83-9.721-14-3.5h-1.944v1.167l11.666 11.406 21.387 19.314 26.767 24.887 1.36 6.157-3.434 4.86-3.63-.518-23.526-17.693-9.073-7.972-20.545-17.304h-1.36v1.814l4.73 6.935 25.017 37.59 1.296 11.536-1.814 3.76-6.481 2.268-7.13-1.297-14.647-20.544-15.1-23.138-12.185-20.739-1.49.843-7.194 77.448-3.37 3.953-7.778 2.981-6.48-4.925-3.436-7.972 3.435-15.749 4.148-20.544 3.37-16.333 3.046-20.285 1.815-6.74-.13-.454-1.49.194-15.295 20.999-23.267 31.433-18.406 19.702-4.407 1.75-7.648-3.954.713-7.064 4.277-6.286 25.47-32.405 15.36-20.092 9.917-11.6-.065-1.686h-.583L44.07 198.125l-12.055 1.555-5.185-4.86.648-7.972 2.463-2.593 20.35-13.999-.064.065Z"/></svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/deepseek.svg b/bench/dashboard/assets/logos/deepseek.svg
new file mode 100644
index 0000000..1401c17
--- /dev/null
+++ b/bench/dashboard/assets/logos/deepseek.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" style="flex:none;line-height:1" viewBox="0 0 24 24"><path fill="#4D6BFE" d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 0 1-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 0 0-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 0 1-.465.137 9.597 9.597 0 0 0-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 0 0 1.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 0 1 1.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 0 1 .415-.287.302.302 0 0 1 .2.288.306.306 0 0 1-.31.307.303.303 0 0 1-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 0 1-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 0 1 .016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 0 1-.254-.078.253.253 0 0 1-.114-.358c.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z"/></svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/gemini.svg b/bench/dashboard/assets/logos/gemini.svg
new file mode 100644
index 0000000..87cce06
--- /dev/null
+++ b/bench/dashboard/assets/logos/gemini.svg
@@ -0,0 +1 @@
+<svg viewBox="0 0 296 298" xmlns="http://www.w3.org/2000/svg" width="296" height="298" fill="none"><mask id="gemini__a" width="296" height="298" x="0" y="0" maskUnits="userSpaceOnUse" style="mask-type:alpha"><path fill="#3186FF" d="M141.201 4.886c2.282-6.17 11.042-6.071 13.184.148l5.985 17.37a184.004 184.004 0 0 0 111.257 113.049l19.304 6.997c6.143 2.227 6.156 10.91.02 13.155l-19.35 7.082a184.001 184.001 0 0 0-109.495 109.385l-7.573 20.629c-2.241 6.105-10.869 6.121-13.133.025l-7.908-21.296a184 184 0 0 0-109.02-108.658l-19.698-7.239c-6.102-2.243-6.118-10.867-.025-13.132l20.083-7.467A183.998 183.998 0 0 0 133.291 26.28l7.91-21.394Z"/></mask><g mask="url(#gemini__a)"><g filter="url(#gemini__b)"><ellipse cx="163" cy="149" fill="#3689FF" rx="196" ry="159"/></g><g filter="url(#gemini__c)"><ellipse cx="33.5" cy="142.5" fill="#F6C013" rx="68.5" ry="72.5"/></g><g filter="url(#gemini__d)"><ellipse cx="19.5" cy="148.5" fill="#F6C013" rx="68.5" ry="72.5"/></g><g filter="url(#gemini__e)"><path fill="#FA4340" d="M194 10.5C172 82.5 65.5 134.333 22.5 135L144-66l50 76.5Z"/></g><g filter="url(#gemini__f)"><path fill="#FA4340" d="M190.5-12.5C168.5 59.5 62 111.333 19 112L140.5-89l50 76.5Z"/></g><g filter="url(#gemini__g)"><path fill="#14BB69" d="M194.5 279.5C172.5 207.5 66 155.667 23 155l121.5 201 50-76.5Z"/></g><g filter="url(#gemini__h)"><path fill="#14BB69" d="M196.5 320.5C174.5 248.5 68 196.667 25 196l121.5 201 50-76.5Z"/></g></g><defs><filter id="gemini__b" width="464" height="390" x="-69" y="-46" color-interpolation-filters="sRGB" filterUnits="userSpaceOnUse"><feFlood flood-opacity="0" result="BackgroundImageFix"/><feBlend in="SourceGraphic" in2="BackgroundImageFix" result="shape"/><feGaussianBlur result="effect1_foregroundBlur_69_17998" stdDeviation="18"/></filter><filter id="gemini__c" width="265" height="273" x="-99" y="6" color-interpolation-filters="sRGB" filterUnits="userSpaceOnUse"><feFlood flood-opacity="0" result="BackgroundImageFix"/><feBlend in="SourceGraphic" in2="BackgroundImageFix" result="shape"/><feGaussianBlur result="effect1_foregroundBlur_69_17998" stdDeviation="32"/></filter><filter id="gemini__d" width="265" height="273" x="-113" y="12" color-interpolation-filters="sRGB" filterUnits="userSpaceOnUse"><feFlood flood-opacity="0" result="BackgroundImageFix"/><feBlend in="SourceGraphic" in2="BackgroundImageFix" result="shape"/><feGaussianBlur result="effect1_foregroundBlur_69_17998" stdDeviation="32"/></filter><filter id="gemini__e" width="299.5" height="329" x="-41.5" y="-130" color-interpolation-filters="sRGB" filterUnits="userSpaceOnUse"><feFlood flood-opacity="0" result="BackgroundImageFix"/><feBlend in="SourceGraphic" in2="BackgroundImageFix" result="shape"/><feGaussianBlur result="effect1_foregroundBlur_69_17998" stdDeviation="32"/></filter><filter id="gemini__f" width="299.5" height="329" x="-45" y="-153" color-interpolation-filters="sRGB" filterUnits="userSpaceOnUse"><feFlood flood-opacity="0" result="BackgroundImageFix"/><feBlend in="SourceGraphic" in2="BackgroundImageFix" result="shape"/><feGaussianBlur result="effect1_foregroundBlur_69_17998" stdDeviation="32"/></filter><filter id="gemini__g" width="299.5" height="329" x="-41" y="91" color-interpolation-filters="sRGB" filterUnits="userSpaceOnUse"><feFlood flood-opacity="0" result="BackgroundImageFix"/><feBlend in="SourceGraphic" in2="BackgroundImageFix" result="shape"/><feGaussianBlur result="effect1_foregroundBlur_69_17998" stdDeviation="32"/></filter><filter id="gemini__h" width="299.5" height="329" x="-39" y="132" color-interpolation-filters="sRGB" filterUnits="userSpaceOnUse"><feFlood flood-opacity="0" result="BackgroundImageFix"/><feBlend in="SourceGraphic" in2="BackgroundImageFix" result="shape"/><feGaussianBlur result="effect1_foregroundBlur_69_17998" stdDeviation="32"/></filter></defs></svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/glm.svg b/bench/dashboard/assets/logos/glm.svg
new file mode 100644
index 0000000..28ca728
--- /dev/null
+++ b/bench/dashboard/assets/logos/glm.svg
@@ -0,0 +1,215 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0.0 0.0 30.0 30.0" style="enable-background:new 0 0 30 30;" xml:space="preserve" width="316.22776601683796" height="316.22776601683796">
+<style type="text/css">
+	.st0{opacity:0.3;fill:#E2E4E7;}
+	.st1{opacity:0.8;fill:#E2E4E7;stroke:#FFFFFF;stroke-width:5;stroke-miterlimit:10;}
+	.st2{fill:url(#SVGID_1_);}
+	.st3{fill:none;stroke:#E0E4E9;stroke-width:0.25;stroke-miterlimit:10;}
+	.st4{fill:none;}
+	.st5{fill:#9DA1A5;}
+	.st6{fill-rule:evenodd;clip-rule:evenodd;fill:none;}
+	.st7{fill-rule:evenodd;clip-rule:evenodd;fill:#DFE2E7;}
+	.st8{fill-rule:evenodd;clip-rule:evenodd;fill:#CDD4DA;}
+	.st9{fill-rule:evenodd;clip-rule:evenodd;fill:#B3BCC7;}
+	.st10{fill-rule:evenodd;clip-rule:evenodd;fill:#9DAAB7;}
+	.st11{fill-rule:evenodd;clip-rule:evenodd;fill:#8698A8;}
+	.st12{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_2_);}
+	.st13{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_3_);}
+	.st14{fill:#1F63EC;}
+	.st15{fill:#2D2D2D;}
+	.st16{fill:none;stroke:#E0E4E9;stroke-width:0.5;stroke-miterlimit:10;}
+	.st17{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_4_);}
+	.st18{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_5_);}
+	.st19{fill:none;stroke:#677380;stroke-width:0.5;stroke-miterlimit:10;}
+	.st20{fill:none;stroke:url(#SVGID_6_);stroke-width:2;stroke-miterlimit:10;}
+	.st21{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_7_);}
+	.st22{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_8_);}
+	.st23{fill:#FFFFFF;}
+	.st24{fill-rule:evenodd;clip-rule:evenodd;fill:#2D2D2D;}
+	.st25{clip-path:url(#SVGID_10_);}
+	.st26{clip-path:url(#SVGID_12_);}
+	.st27{fill:url(#SVGID_13_);}
+	.st28{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_14_);}
+	.st29{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_15_);}
+	.st30{clip-path:url(#SVGID_17_);}
+	.st31{clip-path:url(#SVGID_19_);}
+	.st32{fill:url(#SVGID_20_);}
+	.st33{fill:none;stroke:url(#SVGID_21_);stroke-width:2;stroke-miterlimit:10;}
+	.st34{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_22_);}
+	.st35{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_23_);}
+	.st36{clip-path:url(#SVGID_25_);}
+	.st37{clip-path:url(#SVGID_27_);}
+	.st38{fill:url(#SVGID_28_);}
+	.st39{clip-path:url(#SVGID_30_);}
+	.st40{clip-path:url(#SVGID_32_);}
+	.st41{fill:url(#SVGID_33_);}
+	.st42{fill-rule:evenodd;clip-rule:evenodd;fill:#126EF6;}
+	.st43{fill-rule:evenodd;clip-rule:evenodd;fill:#FFFFFF;}
+	.st44{clip-path:url(#SVGID_35_);}
+	.st45{clip-path:url(#SVGID_37_);}
+	.st46{fill:url(#SVGID_38_);}
+	.st47{fill-rule:evenodd;clip-rule:evenodd;fill:#9DA1A5;}
+	.st48{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_39_);}
+	.st49{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_40_);}
+	.st50{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_41_);}
+	.st51{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_42_);}
+	.st52{fill:none;stroke:url(#SVGID_43_);stroke-width:2;stroke-miterlimit:10;}
+	.st53{fill-rule:evenodd;clip-rule:evenodd;fill:none;stroke:#E0E4E9;stroke-width:0.5;stroke-miterlimit:10;}
+	.st54{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_44_);}
+	.st55{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_45_);}
+	.st56{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_46_);}
+	.st57{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_47_);}
+	.st58{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_48_);}
+	.st59{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_49_);}
+	.st60{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_50_);}
+	.st61{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_51_);}
+	.st62{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_52_);}
+	.st63{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_53_);}
+	.st64{clip-path:url(#SVGID_55_);}
+	.st65{clip-path:url(#SVGID_57_);}
+	.st66{fill:url(#SVGID_58_);}
+	.st67{clip-path:url(#SVGID_60_);}
+	.st68{clip-path:url(#SVGID_62_);}
+	.st69{fill:url(#SVGID_63_);}
+	.st70{fill:none;stroke:url(#SVGID_64_);stroke-width:2;stroke-miterlimit:10;}
+	.st71{clip-path:url(#SVGID_66_);}
+	.st72{clip-path:url(#SVGID_68_);}
+	.st73{fill:url(#SVGID_69_);}
+	.st74{clip-path:url(#SVGID_71_);}
+	.st75{clip-path:url(#SVGID_73_);}
+	.st76{fill:url(#SVGID_74_);}
+	.st77{clip-path:url(#SVGID_76_);}
+	.st78{clip-path:url(#SVGID_78_);}
+	.st79{fill:url(#SVGID_79_);}
+	.st80{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_80_);}
+	.st81{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_81_);}
+	.st82{clip-path:url(#SVGID_83_);}
+	.st83{clip-path:url(#SVGID_85_);}
+	.st84{fill:url(#SVGID_86_);}
+	.st85{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_87_);}
+	.st86{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_88_);}
+	.st87{clip-path:url(#SVGID_90_);}
+	.st88{clip-path:url(#SVGID_92_);}
+	.st89{fill:url(#SVGID_93_);}
+	.st90{fill:none;stroke:url(#SVGID_94_);stroke-width:2;stroke-miterlimit:10;}
+	.st91{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_95_);}
+	.st92{fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_96_);}
+	.st93{clip-path:url(#SVGID_98_);}
+	.st94{clip-path:url(#SVGID_100_);}
+	.st95{fill:url(#SVGID_101_);}
+	.st96{clip-path:url(#SVGID_103_);}
+	.st97{clip-path:url(#SVGID_105_);}
+	.st98{fill:url(#SVGID_106_);}
+	.st99{clip-path:url(#SVGID_108_);}
+	.st100{clip-path:url(#SVGID_110_);}
+	.st101{fill:url(#SVGID_111_);}
+	.st102{fill:#FFFFFF;stroke:#B3BCC7;stroke-width:0.275;stroke-miterlimit:10;}
+	.st103{clip-path:url(#SVGID_113_);}
+	.st104{fill:#FDD138;}
+	.st105{fill:#FCA62F;}
+	.st106{fill:#FB7927;}
+	.st107{fill:#F44B22;}
+	.st108{fill:#D81915;}
+	.st109{fill:#2D2D2D;stroke:#FFFFFF;stroke-width:0.3354;stroke-miterlimit:10;}
+	.st110{fill:none;stroke:#65727F;stroke-width:2;stroke-miterlimit:10;}
+	.st111{fill:none;stroke:#65727F;stroke-width:0.75;stroke-miterlimit:10;}
+	.st112{fill:url(#SVGID_114_);}
+	.st113{fill:#D06C50;}
+	.st114{fill:#2D2D2D;stroke:#B3BCC7;stroke-width:0.275;stroke-miterlimit:10;}
+	.st115{opacity:0.2;}
+	.st116{fill:none;stroke:#677380;stroke-width:0.3564;stroke-miterlimit:10;}
+	.st117{fill:none;stroke:#677380;stroke-width:0.3564;stroke-miterlimit:10;stroke-dasharray:1.0212,1.0212;}
+	.st118{fill:none;stroke:#677380;stroke-width:0.3564;stroke-miterlimit:10;stroke-dasharray:1.0205,1.0205;}
+	.st119{opacity:0.2;fill:none;}
+	.st120{fill:none;stroke:#677380;stroke-width:0.3689;stroke-miterlimit:10;}
+	.st121{fill:none;stroke:#677380;stroke-width:0.3689;stroke-miterlimit:10;stroke-dasharray:1.0509,1.0509;}
+	.st122{opacity:0.3;fill:#1F63EC;}
+	.st123{fill:#2D2D2D;stroke:#FFFFFF;stroke-width:0.3162;stroke-miterlimit:10;}
+	.st124{fill:#FFFFFF;stroke:#B3BCC7;stroke-width:0.3162;stroke-miterlimit:10;}
+	.st125{clip-path:url(#SVGID_118_);}
+	.st126{fill:url(#SVGID_119_);}
+	.st127{fill:none;stroke:#DFE2E7;stroke-width:0.75;stroke-miterlimit:10;}
+	.st128{fill:#9DA1A5;stroke:#FFFFFF;stroke-miterlimit:10;}
+	.st129{fill:url(#SVGID_120_);}
+	.st130{fill:none;stroke:#677380;stroke-width:0.75;stroke-miterlimit:10;}
+	.st131{opacity:0.4;}
+	.st132{clip-path:url(#SVGID_122_);}
+	.st133{clip-path:url(#SVGID_124_);}
+	.st134{fill:url(#SVGID_125_);}
+	.st135{fill:none;stroke:#8392A3;stroke-width:0.35;stroke-miterlimit:10;}
+	.st136{fill:none;stroke:#8392A3;stroke-width:0.35;stroke-miterlimit:10;stroke-dasharray:0.9951,0.9951;}
+	.st137{fill:none;stroke:#8392A3;stroke-width:0.35;stroke-miterlimit:10;stroke-dasharray:1.004,1.004;}
+	.st138{fill:none;stroke:url(#SVGID_126_);stroke-width:1.5;stroke-miterlimit:10;}
+	.st139{fill:url(#SVGID_127_);}
+	.st140{fill:none;stroke:#DDE0E4;stroke-width:0.35;stroke-miterlimit:10;}
+	.st141{fill:#2D2D2D;stroke:#A9B3BE;stroke-width:0.275;stroke-miterlimit:10;}
+	.st142{fill-rule:evenodd;clip-rule:evenodd;fill:#126EF4;}
+	.st143{fill:#FFFFFF;stroke:#B1BAC4;stroke-width:0.275;stroke-miterlimit:10;}
+	.st144{fill:#CE6C50;}
+	.st145{fill:#5B5B5B;}
+	.st146{fill:#8392A3;}
+	.st147{fill:none;stroke:url(#SVGID_128_);stroke-width:1.5;stroke-miterlimit:10;}
+	.st148{fill:url(#SVGID_129_);}
+	.st149{fill:none;stroke:#B5BDC4;stroke-width:0.7;stroke-miterlimit:10;}
+	.st150{opacity:0.6;fill:none;stroke:#78838E;stroke-width:0.35;stroke-miterlimit:10;}
+	.st151{opacity:0.2;fill:none;stroke:#8392A3;stroke-width:0.35;stroke-miterlimit:10;stroke-dasharray:1,1;}
+	.st152{fill:none;stroke:#DDE0E4;stroke-width:0.75;stroke-miterlimit:10;}
+	.st153{fill:none;stroke:#8392A3;stroke-width:0.5;stroke-miterlimit:10;}
+	.st154{opacity:0.2;fill:none;stroke:#677380;stroke-width:0.3564;stroke-miterlimit:10;stroke-dasharray:1.0182,1.0182;}
+	.st155{fill:none;stroke:#DDE0E4;stroke-width:0.765;stroke-miterlimit:10;}
+	.st156{fill:url(#SVGID_130_);}
+	.st157{fill:url(#SVGID_131_);}
+	.st158{fill:#B1BAC4;}
+	.st159{fill:#CBD1D8;}
+	.st160{fill:#0B1B2B;}
+	.st161{fill:#91D119;}
+	.st162{opacity:0.7;}
+	.st163{fill:#FFFFFF;stroke:#000000;stroke-width:0.4418;stroke-miterlimit:10;}
+	.st164{fill:none;stroke:#939CAA;stroke-width:0.2209;stroke-miterlimit:10;}
+	.st165{fill:none;stroke:#FFFFFF;stroke-width:3.0924;stroke-miterlimit:10;}
+	.st166{fill:url(#SVGID_132_);}
+	.st167{fill:none;stroke:url(#SVGID_133_);stroke-width:1.714;stroke-miterlimit:10;}
+	.st168{fill:url(#SVGID_134_);}
+	.st169{fill:url(#SVGID_135_);}
+	.st170{fill:url(#SVGID_136_);}
+	.st171{fill:url(#SVGID_137_);}
+	.st172{fill:url(#SVGID_138_);}
+	.st173{fill:url(#SVGID_139_);}
+	.st174{fill:url(#SVGID_140_);}
+	.st175{fill:url(#SVGID_141_);}
+	.st176{fill:url(#SVGID_142_);}
+	.st177{fill:url(#SVGID_143_);}
+	.st178{fill:url(#SVGID_144_);}
+	.st179{fill:none;stroke:#1F63EC;stroke-width:4;stroke-miterlimit:10;}
+	.st180{fill:none;stroke:#0B1B2B;stroke-width:4;stroke-miterlimit:10;}
+	.st181{fill:none;stroke:#677380;stroke-width:0.3989;stroke-miterlimit:10;}
+	.st182{fill:none;stroke:#677380;stroke-width:0.3989;stroke-miterlimit:10;stroke-dasharray:1.14,1.14;}
+	.st183{fill:#257AF1;}
+	.st184{opacity:0.3;fill:#FFFFFF;}
+	.st185{fill:none;stroke:#98A5B2;stroke-width:4;stroke-miterlimit:10;}
+	.st186{fill:none;stroke:#65727F;stroke-width:0.3989;stroke-miterlimit:10;}
+	.st187{fill:none;stroke:#65727F;stroke-width:0.3989;stroke-miterlimit:10;stroke-dasharray:1.14,1.14;}
+	.st188{fill:none;stroke:#DDDFE4;stroke-width:0.75;stroke-miterlimit:10;}
+	.st189{fill:#9A9EA2;}
+	.st190{fill-rule:evenodd;clip-rule:evenodd;fill:#3267AC;}
+	.st191{fill:#FFFFFF;stroke:#AFB8C3;stroke-width:0.275;stroke-miterlimit:10;}
+	.st192{fill:#C5694E;}
+	.st193{fill:#8192A2;}
+	.st194{fill:#2D2D2D;stroke:#FFFFFF;stroke-width:0.6317;stroke-miterlimit:10;}
+</style>
+<g id="图层_2">
+</g>
+<g id="图层_1">
+	<path class="st194" d="M24.51,28.51H5.49c-2.21,0-4-1.79-4-4V5.49c0-2.21,1.79-4,4-4h19.03c2.21,0,4,1.79,4,4v19.03   C28.51,26.72,26.72,28.51,24.51,28.51z"/>
+	<g>
+		<g>
+			<g>
+				<g>
+					<path class="st23" d="M15.47,7.1l-1.3,1.85c-0.2,0.29-0.54,0.47-0.9,0.47h-7.1V7.09C6.16,7.1,15.47,7.1,15.47,7.1z"/>
+					<polygon class="st23" points="24.3,7.1 13.14,22.91 5.7,22.91 16.86,7.1      "/>
+					<path class="st23" d="M14.53,22.91l1.31-1.86c0.2-0.29,0.54-0.47,0.9-0.47h7.09v2.33H14.53z"/>
+				</g>
+			</g>
+		</g>
+	</g>
+</g>
+</svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/grok.svg b/bench/dashboard/assets/logos/grok.svg
new file mode 100644
index 0000000..06ab179
--- /dev/null
+++ b/bench/dashboard/assets/logos/grok.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" fill="#ffff" viewBox="0 0 841.89 595.28"><path d="m557.09 211.99 8.31 326.37h66.56l8.32-445.18zM640.28 56.91H538.72L379.35 284.53l50.78 72.52zM201.61 538.36h101.56l50.79-72.52-50.79-72.53zM201.61 211.99l228.52 326.37h101.56L303.17 211.99z"/></svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/kimi.svg b/bench/dashboard/assets/logos/kimi.svg
new file mode 100644
index 0000000..db43fce
--- /dev/null
+++ b/bench/dashboard/assets/logos/kimi.svg
@@ -0,0 +1 @@
+<svg viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg" fill-rule="evenodd" clip-rule="evenodd" stroke-linejoin="round" stroke-miterlimit="2"><path d="M503 114.333v280c0 60.711-49.29 110-110 110H113c-60.711 0-110-49.289-110-110v-280c0-60.71 49.289-110 110-110h280c60.71 0 110 49.29 110 110z"/><path d="M342.065 189.759c1.886-2.42 3.541-4.63 5.289-6.77.81-1.007.74-1.771-.046-2.824-7.58-9.965-8.298-21.028-3.935-32.254 3.275-8.448 10.52-12.406 19.373-13.25 5.52-.521 10.936.046 15.959 2.73 6.596 3.53 10.438 8.912 11.688 16.341.995 5.926.81 11.712-.868 17.452-2.974 10.161-10.277 15.427-20.287 16.758-8.31 1.11-16.734 1.25-25.113 1.817-.648.046-1.308 0-2.06 0z" fill="#027aff"/><path d="M321.512 144.254h-50.064l-39.637 90.384h-56.036v-89.99H131v232.868h44.787v-98.103h78.973c13.598 0 26.015-7.927 31.744-20.252v118.355h44.787v-98.103c0-23.342-18.239-42.97-41.523-44.671v-.116h-24.593a45.577 45.577 0 0026.884-24.534l29.453-65.838z" fill="#fff"/></svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/minimax.svg b/bench/dashboard/assets/logos/minimax.svg
new file mode 100644
index 0000000..2a60bd4
--- /dev/null
+++ b/bench/dashboard/assets/logos/minimax.svg
@@ -0,0 +1 @@
+<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Minimax</title><defs><linearGradient id="lobe-icons-minimax-fill" x1="0%" x2="100.182%" y1="50.057%" y2="50.057%"><stop offset="0%" stop-color="#E2167E"></stop><stop offset="100%" stop-color="#FE603C"></stop></linearGradient></defs><path d="M16.278 2c1.156 0 2.093.927 2.093 2.07v12.501a.74.74 0 00.744.709.74.74 0 00.743-.709V9.099a2.06 2.06 0 012.071-2.049A2.06 2.06 0 0124 9.1v6.561a.649.649 0 01-.652.645.649.649 0 01-.653-.645V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v7.472a2.037 2.037 0 01-2.048 2.026 2.037 2.037 0 01-2.048-2.026v-12.5a.785.785 0 00-.788-.753.785.785 0 00-.789.752l-.001 15.904A2.037 2.037 0 0113.441 22a2.037 2.037 0 01-2.048-2.026V18.04c0-.356.292-.645.652-.645.36 0 .652.289.652.645v1.934c0 .263.142.506.372.638.23.131.514.131.744 0a.734.734 0 00.372-.638V4.07c0-1.143.937-2.07 2.093-2.07zm-5.674 0c1.156 0 2.093.927 2.093 2.07v11.523a.648.648 0 01-.652.645.648.648 0 01-.652-.645V4.07a.785.785 0 00-.789-.78.785.785 0 00-.789.78v14.013a2.06 2.06 0 01-2.07 2.048 2.06 2.06 0 01-2.071-2.048V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v3.8a2.06 2.06 0 01-2.071 2.049A2.06 2.06 0 010 12.9v-1.378c0-.357.292-.646.652-.646.36 0 .653.29.653.646V12.9c0 .418.343.757.766.757s.766-.339.766-.757V9.099a2.06 2.06 0 012.07-2.048 2.06 2.06 0 012.071 2.048v8.984c0 .419.343.758.767.758.423 0 .766-.339.766-.758V4.07c0-1.143.937-2.07 2.093-2.07z" fill="url(#lobe-icons-minimax-fill)" fill-rule="nonzero"></path></svg>
\ No newline at end of file
diff --git a/bench/dashboard/assets/logos/openai.svg b/bench/dashboard/assets/logos/openai.svg
new file mode 100644
index 0000000..b6d542d
--- /dev/null
+++ b/bench/dashboard/assets/logos/openai.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="256" height="260" preserveAspectRatio="xMidYMid" viewBox="0 0 256 260"><path fill="#fff" d="M239.184 106.203a64.716 64.716 0 0 0-5.576-53.103C219.452 28.459 191 15.784 163.213 21.74A65.586 65.586 0 0 0 52.096 45.22a64.716 64.716 0 0 0-43.23 31.36c-14.31 24.602-11.061 55.634 8.033 76.74a64.665 64.665 0 0 0 5.525 53.102c14.174 24.65 42.644 37.324 70.446 31.36a64.72 64.72 0 0 0 48.754 21.744c28.481.025 53.714-18.361 62.414-45.481a64.767 64.767 0 0 0 43.229-31.36c14.137-24.558 10.875-55.423-8.083-76.483Zm-97.56 136.338a48.397 48.397 0 0 1-31.105-11.255l1.535-.87 51.67-29.825a8.595 8.595 0 0 0 4.247-7.367v-72.85l21.845 12.636c.218.111.37.32.409.563v60.367c-.056 26.818-21.783 48.545-48.601 48.601Zm-104.466-44.61a48.345 48.345 0 0 1-5.781-32.589l1.534.921 51.722 29.826a8.339 8.339 0 0 0 8.441 0l63.181-36.425v25.221a.87.87 0 0 1-.358.665l-52.335 30.184c-23.257 13.398-52.97 5.431-66.404-17.803ZM23.549 85.38a48.499 48.499 0 0 1 25.58-21.333v61.39a8.288 8.288 0 0 0 4.195 7.316l62.874 36.272-21.845 12.636a.819.819 0 0 1-.767 0L41.353 151.53c-23.211-13.454-31.171-43.144-17.804-66.405v.256Zm179.466 41.695-63.08-36.63L161.73 77.86a.819.819 0 0 1 .768 0l52.233 30.184a48.6 48.6 0 0 1-7.316 87.635v-61.391a8.544 8.544 0 0 0-4.4-7.213Zm21.742-32.69-1.535-.922-51.619-30.081a8.39 8.39 0 0 0-8.492 0L99.98 99.808V74.587a.716.716 0 0 1 .307-.665l52.233-30.133a48.652 48.652 0 0 1 72.236 50.391v.205ZM88.061 139.097l-21.845-12.585a.87.87 0 0 1-.41-.614V65.685a48.652 48.652 0 0 1 79.757-37.346l-1.535.87-51.67 29.825a8.595 8.595 0 0 0-4.246 7.367l-.051 72.697Zm11.868-25.58 28.138-16.217 28.188 16.218v32.434l-28.086 16.218-28.188-16.218-.052-32.434Z"/></svg>
\ No newline at end of file
diff --git a/bench/dashboard/index.html b/bench/dashboard/index.html
new file mode 100644
index 0000000..edcf297
--- /dev/null
+++ b/bench/dashboard/index.html
@@ -0,0 +1,21 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Quipbench Dashboard</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=DM+Serif+Display&family=Inter:wght@400;500;600;700;900&family=JetBrains+Mono:wght@400;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="./styles.css" />
+  </head>
+  <body>
+    <div id="app" class="app"></div>
+    <script src="../out/latest.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.6/dist/chart.umd.min.js"></script>
+    <script src="./app.js"></script>
+  </body>
+</html>
diff --git a/bench/dashboard/styles.css b/bench/dashboard/styles.css
new file mode 100644
index 0000000..31a5699
--- /dev/null
+++ b/bench/dashboard/styles.css
@@ -0,0 +1,251 @@
+* {
+  margin: 0;
+  padding: 0;
+  box-sizing: border-box;
+}
+
+:root {
+  --bg: #050505;
+  --surface: #0a0a0a;
+  --surface-2: #111111;
+  --border: #212121;
+  --border-light: #2d2d2d;
+  --text: #ededed;
+  --text-dim: #a2a2a2;
+  --text-muted: #6a6a6a;
+  --accent: #d97757;
+  --mono: "JetBrains Mono", "SF Mono", monospace;
+  --sans: "Inter", -apple-system, sans-serif;
+  --serif: "DM Serif Display", Georgia, serif;
+}
+
+body {
+  background: var(--bg);
+  color: var(--text);
+  font-family: var(--sans);
+  min-height: 100vh;
+  -webkit-font-smoothing: antialiased;
+}
+
+.app {
+  min-height: 100vh;
+}
+
+.shell {
+  max-width: 1200px;
+  margin: 0 auto;
+  padding: 32px 20px 64px;
+  display: flex;
+  flex-direction: column;
+  gap: 20px;
+}
+
+.header {
+  display: flex;
+  flex-wrap: wrap;
+  justify-content: space-between;
+  gap: 12px;
+  align-items: center;
+}
+
+.brand {
+  display: inline-flex;
+  align-items: center;
+  gap: 12px;
+}
+
+.brand img {
+  width: 28px;
+  height: 28px;
+}
+
+.brand h1 {
+  font-family: var(--serif);
+  font-size: clamp(26px, 3.5vw, 36px);
+  letter-spacing: -0.4px;
+}
+
+.header-links {
+  display: inline-flex;
+  align-items: center;
+  gap: 10px;
+}
+
+.header-links a {
+  color: var(--text-dim);
+  text-decoration: none;
+  border: 1px solid var(--border);
+  background: rgba(255, 255, 255, 0.02);
+  border-radius: 999px;
+  padding: 6px 10px;
+  font-family: var(--mono);
+  font-size: 11px;
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+}
+
+.header-links a:hover {
+  color: var(--text);
+  border-color: var(--border-light);
+}
+
+.header-links a svg {
+  width: 13px;
+  height: 13px;
+  fill: currentColor;
+}
+
+.meta-pills {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+}
+
+.pill {
+  border: 1px solid var(--border);
+  background: rgba(255, 255, 255, 0.02);
+  color: var(--text-dim);
+  border-radius: 999px;
+  padding: 6px 10px;
+  font-size: 11px;
+}
+
+.panel {
+  border: 1px solid var(--border);
+  background: var(--surface);
+  border-radius: 12px;
+  padding: 18px;
+}
+
+.panel--summary {
+  display: grid;
+  grid-template-columns: repeat(4, minmax(0, 1fr));
+  gap: 12px;
+}
+
+.summary-block {
+  border: 1px solid var(--border);
+  background: var(--surface-2);
+  border-radius: 8px;
+  padding: 10px 12px;
+  min-width: 0;
+}
+
+.summary-label {
+  color: var(--text-muted);
+  font-size: 11px;
+  letter-spacing: 0.5px;
+  text-transform: uppercase;
+  margin-bottom: 6px;
+}
+
+.summary-value {
+  font-size: 14px;
+  color: var(--text);
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.panel-head {
+  margin-bottom: 16px;
+  display: flex;
+  flex-wrap: wrap;
+  align-items: baseline;
+  justify-content: space-between;
+  gap: 8px;
+}
+
+.panel-head h2 {
+  font-size: 18px;
+  letter-spacing: -0.2px;
+}
+
+.panel-head p {
+  color: var(--text-muted);
+  font-size: 11px;
+}
+
+.chart-shell {
+  height: min(46vw, 420px);
+  min-height: 280px;
+  border: 1px solid var(--border);
+  border-radius: 10px;
+  background: linear-gradient(to bottom, #121212, #0d0d0d);
+  padding: 12px;
+}
+
+.table-wrap {
+  overflow-x: auto;
+}
+
+table {
+  width: 100%;
+  border-collapse: collapse;
+  min-width: 680px;
+}
+
+th,
+td {
+  text-align: left;
+  padding: 10px;
+  border-bottom: 1px solid var(--border);
+  font-size: 13px;
+}
+
+th {
+  color: var(--text-muted);
+  font-weight: 600;
+}
+
+.model-cell {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+}
+
+.model-cell img {
+  width: 16px;
+  height: 16px;
+}
+
+.rank {
+  color: var(--text-dim);
+  width: 36px;
+}
+
+.mono {
+  font-family: var(--mono);
+}
+
+.empty {
+  font-family: var(--mono);
+  font-size: 14px;
+  color: var(--text-muted);
+}
+
+code {
+  font-family: var(--mono);
+  font-size: 12px;
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  padding: 2px 6px;
+  background: var(--surface-2);
+}
+
+@media (max-width: 900px) {
+  .panel--summary {
+    grid-template-columns: repeat(2, minmax(0, 1fr));
+  }
+}
+
+@media (max-width: 560px) {
+  .shell {
+    padding: 22px 12px 40px;
+  }
+
+  .panel--summary {
+    grid-template-columns: 1fr;
+  }
+}
diff --git a/bench/db.ts b/bench/db.ts
new file mode 100644
index 0000000..d01b196
--- /dev/null
+++ b/bench/db.ts
@@ -0,0 +1,372 @@
+import { Database } from "bun:sqlite";
+import type { LeaderboardRow, MatchRecord, RunMeta } from "./types";
+
+export type RunRow = {
+  id: string;
+  started_at: string;
+  ended_at: string | null;
+  rounds_requested: number;
+  rounds_completed: number;
+  failures: number;
+  concurrency: number;
+  elo_k: number;
+  initial_elo: number;
+  seed: number;
+  status: string;
+  output_dir: string;
+};
+
+export function openBenchDb(path: string): Database {
+  const db = new Database(path, { create: true });
+  initSchema(db);
+  return db;
+}
+
+function initSchema(db: Database) {
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS runs (
+      id TEXT PRIMARY KEY,
+      started_at TEXT NOT NULL,
+      ended_at TEXT,
+      rounds_requested INTEGER NOT NULL,
+      rounds_completed INTEGER NOT NULL DEFAULT 0,
+      failures INTEGER NOT NULL DEFAULT 0,
+      concurrency INTEGER NOT NULL,
+      elo_k REAL NOT NULL,
+      initial_elo REAL NOT NULL,
+      seed INTEGER NOT NULL,
+      status TEXT NOT NULL,
+      output_dir TEXT NOT NULL
+    );
+
+    CREATE TABLE IF NOT EXISTS matches (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      run_id TEXT NOT NULL,
+      round_num INTEGER NOT NULL,
+      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+      prompter_id TEXT NOT NULL,
+      prompter_name TEXT NOT NULL,
+      contestant_a_id TEXT NOT NULL,
+      contestant_a_name TEXT NOT NULL,
+      contestant_b_id TEXT NOT NULL,
+      contestant_b_name TEXT NOT NULL,
+      prompt TEXT,
+      answer_a TEXT,
+      answer_b TEXT,
+      votes_a INTEGER NOT NULL DEFAULT 0,
+      votes_b INTEGER NOT NULL DEFAULT 0,
+      winner TEXT NOT NULL,
+      error TEXT,
+      payload_json TEXT,
+      FOREIGN KEY(run_id) REFERENCES runs(id)
+    );
+
+    CREATE TABLE IF NOT EXISTS ratings (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      run_id TEXT NOT NULL,
+      model_id TEXT NOT NULL,
+      model_name TEXT NOT NULL,
+      elo REAL NOT NULL,
+      wins INTEGER NOT NULL,
+      games INTEGER NOT NULL,
+      win_rate REAL NOT NULL,
+      rank INTEGER NOT NULL,
+      FOREIGN KEY(run_id) REFERENCES runs(id)
+    );
+
+    CREATE INDEX IF NOT EXISTS idx_matches_run_id ON matches(run_id);
+    CREATE INDEX IF NOT EXISTS idx_ratings_run_id ON ratings(run_id);
+    CREATE INDEX IF NOT EXISTS idx_runs_started_at ON runs(started_at);
+  `);
+}
+
+export function insertRunStart(
+  db: Database,
+  row: {
+    id: string;
+    startedAt: string;
+    roundsRequested: number;
+    concurrency: number;
+    eloK: number;
+    initialElo: number;
+    seed: number;
+    outputDir: string;
+  },
+) {
+  const stmt = db.prepare(`
+    INSERT INTO runs (
+      id,
+      started_at,
+      rounds_requested,
+      concurrency,
+      elo_k,
+      initial_elo,
+      seed,
+      status,
+      output_dir
+    ) VALUES (
+      $id,
+      $started_at,
+      $rounds_requested,
+      $concurrency,
+      $elo_k,
+      $initial_elo,
+      $seed,
+      'running',
+      $output_dir
+    )
+  `);
+
+  stmt.run({
+    $id: row.id,
+    $started_at: row.startedAt,
+    $rounds_requested: row.roundsRequested,
+    $concurrency: row.concurrency,
+    $elo_k: row.eloK,
+    $initial_elo: row.initialElo,
+    $seed: row.seed,
+    $output_dir: row.outputDir,
+  });
+}
+
+export function updateRunProgress(
+  db: Database,
+  runId: string,
+  progress: { roundsCompleted: number; failures: number },
+) {
+  const stmt = db.prepare(`
+    UPDATE runs
+    SET rounds_completed = $rounds_completed,
+        failures = $failures
+    WHERE id = $id
+  `);
+  stmt.run({
+    $id: runId,
+    $rounds_completed: progress.roundsCompleted,
+    $failures: progress.failures,
+  });
+}
+
+export function finalizeRun(
+  db: Database,
+  runId: string,
+  status: "completed" | "failed",
+  endedAt: string,
+) {
+  const stmt = db.prepare(`
+    UPDATE runs
+    SET status = $status,
+        ended_at = $ended_at
+    WHERE id = $id
+  `);
+  stmt.run({
+    $id: runId,
+    $status: status,
+    $ended_at: endedAt,
+  });
+}
+
+export function insertMatch(db: Database, match: MatchRecord) {
+  const stmt = db.prepare(`
+    INSERT INTO matches (
+      run_id,
+      round_num,
+      prompter_id,
+      prompter_name,
+      contestant_a_id,
+      contestant_a_name,
+      contestant_b_id,
+      contestant_b_name,
+      prompt,
+      answer_a,
+      answer_b,
+      votes_a,
+      votes_b,
+      winner,
+      error,
+      payload_json
+    ) VALUES (
+      $run_id,
+      $round_num,
+      $prompter_id,
+      $prompter_name,
+      $contestant_a_id,
+      $contestant_a_name,
+      $contestant_b_id,
+      $contestant_b_name,
+      $prompt,
+      $answer_a,
+      $answer_b,
+      $votes_a,
+      $votes_b,
+      $winner,
+      $error,
+      $payload_json
+    )
+  `);
+
+  stmt.run({
+    $run_id: match.runId,
+    $round_num: match.roundNum,
+    $prompter_id: match.prompter.id,
+    $prompter_name: match.prompter.name,
+    $contestant_a_id: match.contestantA.id,
+    $contestant_a_name: match.contestantA.name,
+    $contestant_b_id: match.contestantB.id,
+    $contestant_b_name: match.contestantB.name,
+    $prompt: match.prompt ?? null,
+    $answer_a: match.answerA ?? null,
+    $answer_b: match.answerB ?? null,
+    $votes_a: match.votesA,
+    $votes_b: match.votesB,
+    $winner: match.winner,
+    $error: match.error ?? null,
+    $payload_json: JSON.stringify(match),
+  });
+}
+
+export function replaceRatings(
+  db: Database,
+  runId: string,
+  leaderboard: LeaderboardRow[],
+) {
+  db.prepare("DELETE FROM ratings WHERE run_id = $run_id").run({ $run_id: runId });
+
+  const stmt = db.prepare(`
+    INSERT INTO ratings (
+      run_id,
+      model_id,
+      model_name,
+      elo,
+      wins,
+      games,
+      win_rate,
+      rank
+    ) VALUES (
+      $run_id,
+      $model_id,
+      $model_name,
+      $elo,
+      $wins,
+      $games,
+      $win_rate,
+      $rank
+    )
+  `);
+
+  const tx = db.transaction((rows: LeaderboardRow[]) => {
+    for (const row of rows) {
+      stmt.run({
+        $run_id: runId,
+        $model_id: row.modelId,
+        $model_name: row.modelName,
+        $elo: row.elo,
+        $wins: row.wins,
+        $games: row.games,
+        $win_rate: row.winRate,
+        $rank: row.rank,
+      });
+    }
+  });
+
+  tx(leaderboard);
+}
+
+export function getLatestCompletedRunId(db: Database): string | null {
+  const row = db
+    .query(
+      `
+      SELECT id
+      FROM runs
+      WHERE status = 'completed'
+      ORDER BY datetime(ended_at) DESC, datetime(started_at) DESC
+      LIMIT 1
+    `,
+    )
+    .get() as { id: string } | null;
+
+  return row?.id ?? null;
+}
+
+export function getRunRow(db: Database, runId: string): RunRow | null {
+  return (
+    (db
+      .query(`
+      SELECT
+        id,
+        started_at,
+        ended_at,
+        rounds_requested,
+        rounds_completed,
+        failures,
+        concurrency,
+        elo_k,
+        initial_elo,
+        seed,
+        status,
+        output_dir
+      FROM runs
+      WHERE id = $run_id
+      LIMIT 1
+    `)
+      .get({ $run_id: runId }) as RunRow | null) ?? null
+  );
+}
+
+export function getRatingsForRun(
+  db: Database,
+  runId: string,
+): LeaderboardRow[] {
+  const rows = db
+    .query(`
+      SELECT
+        rank,
+        model_id,
+        model_name,
+        elo,
+        wins,
+        games,
+        win_rate
+      FROM ratings
+      WHERE run_id = $run_id
+      ORDER BY rank ASC
+    `)
+    .all({ $run_id: runId }) as Array<{
+    rank: number;
+    model_id: string;
+    model_name: string;
+    elo: number;
+    wins: number;
+    games: number;
+    win_rate: number;
+  }>;
+
+  return rows.map((row) => ({
+    rank: row.rank,
+    modelId: row.model_id,
+    modelName: row.model_name,
+    elo: row.elo,
+    wins: row.wins,
+    games: row.games,
+    winRate: row.win_rate,
+  }));
+}
+
+export function runRowToMeta(row: RunRow): RunMeta {
+  if (!row.ended_at) {
+    throw new Error(`Run ${row.id} is not finalized`);
+  }
+
+  return {
+    runId: row.id,
+    startedAt: row.started_at,
+    endedAt: row.ended_at,
+    roundsRequested: row.rounds_requested,
+    roundsCompleted: row.rounds_completed,
+    failures: row.failures,
+    concurrency: row.concurrency,
+    eloK: row.elo_k,
+    initialElo: row.initial_elo,
+    seed: row.seed,
+  };
+}
diff --git a/bench/elo.test.ts b/bench/elo.test.ts
new file mode 100644
index 0000000..080581b
--- /dev/null
+++ b/bench/elo.test.ts
@@ -0,0 +1,21 @@
+import { expect, test } from "bun:test";
+import { expectedScore, updatePairElo } from "./elo";
+
+test("expected score is symmetric", () => {
+  const a = expectedScore(1600, 1500);
+  const b = expectedScore(1500, 1600);
+  expect(Number((a + b).toFixed(8))).toBe(1);
+  expect(a).toBeGreaterThan(0.5);
+});
+
+test("winner gains rating and loser drops", () => {
+  const { nextA, nextB } = updatePairElo(1500, 1500, 1, 24);
+  expect(nextA).toBeGreaterThan(1500);
+  expect(nextB).toBeLessThan(1500);
+});
+
+test("tie moves ratings toward each other", () => {
+  const { nextA, nextB } = updatePairElo(1700, 1500, 0.5, 24);
+  expect(nextA).toBeLessThan(1700);
+  expect(nextB).toBeGreaterThan(1500);
+});
diff --git a/bench/elo.ts b/bench/elo.ts
new file mode 100644
index 0000000..f20dd3f
--- /dev/null
+++ b/bench/elo.ts
@@ -0,0 +1,26 @@
+export function expectedScore(playerElo: number, opponentElo: number): number {
+  return 1 / (1 + 10 ** ((opponentElo - playerElo) / 400));
+}
+
+export function nextElo(
+  playerElo: number,
+  opponentElo: number,
+  actualScore: number,
+  k: number,
+): number {
+  const exp = expectedScore(playerElo, opponentElo);
+  return playerElo + k * (actualScore - exp);
+}
+
+export function updatePairElo(
+  eloA: number,
+  eloB: number,
+  actualA: number,
+  k: number,
+): { nextA: number; nextB: number } {
+  const actualB = 1 - actualA;
+  return {
+    nextA: nextElo(eloA, eloB, actualA, k),
+    nextB: nextElo(eloB, eloA, actualB, k),
+  };
+}
diff --git a/bench/export.ts b/bench/export.ts
new file mode 100644
index 0000000..394bc51
--- /dev/null
+++ b/bench/export.ts
@@ -0,0 +1,96 @@
+import { mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { DEFAULT_DB_PATH, DEFAULT_OUTPUT_DIR } from "./config";
+import {
+  getLatestCompletedRunId,
+  getRatingsForRun,
+  getRunRow,
+  openBenchDb,
+  runRowToMeta,
+} from "./db";
+import type { QuipbenchSnapshot } from "./types";
+
+function parseArgs(argv: string[]) {
+  const args: Record<string, string> = {};
+  for (const raw of argv) {
+    if (!raw.startsWith("--")) continue;
+    const stripped = raw.slice(2);
+    const [key, ...rest] = stripped.split("=");
+    if (!key) continue;
+    args[key] = rest.length === 0 ? "true" : rest.join("=");
+  }
+  return args;
+}
+
+export async function exportLatestSnapshot(options?: {
+  dbPath?: string;
+  outputDir?: string;
+  runId?: string;
+}): Promise<{
+  snapshot: QuipbenchSnapshot;
+  latestJsonPath: string;
+  latestJsPath: string;
+}> {
+  const dbPath = options?.dbPath ?? DEFAULT_DB_PATH;
+  const outputDir = options?.outputDir ?? DEFAULT_OUTPUT_DIR;
+
+  mkdirSync(outputDir, { recursive: true });
+
+  const db = openBenchDb(dbPath);
+  try {
+    const runId = options?.runId ?? getLatestCompletedRunId(db);
+    if (!runId) {
+      throw new Error("No completed Quipbench runs found");
+    }
+
+    const runRow = getRunRow(db, runId);
+    if (!runRow) {
+      throw new Error(`Run not found: ${runId}`);
+    }
+
+    const leaderboard = getRatingsForRun(db, runId);
+    if (leaderboard.length === 0) {
+      throw new Error(`Run ${runId} has no ratings`);
+    }
+
+    const snapshot: QuipbenchSnapshot = {
+      runMeta: runRowToMeta(runRow),
+      leaderboard,
+      chart: leaderboard.map((row) => ({
+        modelName: row.modelName,
+        elo: row.elo,
+      })),
+    };
+
+    const latestJsonPath = join(outputDir, "latest.json");
+    const latestJsPath = join(outputDir, "latest.js");
+
+    writeFileSync(latestJsonPath, JSON.stringify(snapshot, null, 2));
+    writeFileSync(
+      latestJsPath,
+      `window.__QUIPBENCH_LATEST__ = ${JSON.stringify(snapshot, null, 2)};\n`,
+    );
+
+    return { snapshot, latestJsonPath, latestJsPath };
+  } finally {
+    db.close();
+  }
+}
+
+async function main() {
+  const args = parseArgs(process.argv.slice(2));
+  const dbPath = args.db ? join(process.cwd(), args.db) : DEFAULT_DB_PATH;
+  const outputDir = args.out ? join(process.cwd(), args.out) : DEFAULT_OUTPUT_DIR;
+  const runId = args.runId;
+
+  const result = await exportLatestSnapshot({ dbPath, outputDir, runId });
+  console.log(`Exported snapshot: ${result.latestJsonPath}`);
+  console.log(`Exported snapshot script: ${result.latestJsPath}`);
+}
+
+if (import.meta.main) {
+  main().catch((error) => {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  });
+}
diff --git a/bench/finalize-partial.ts b/bench/finalize-partial.ts
new file mode 100644
index 0000000..9a4e46c
--- /dev/null
+++ b/bench/finalize-partial.ts
@@ -0,0 +1,105 @@
+import { QUIPBENCH_MODELS } from "./models";
+import { updatePairElo } from "./elo";
+import { buildLeaderboard } from "./leaderboard";
+import { openBenchDb, replaceRatings } from "./db";
+import { DEFAULT_DB_PATH, DEFAULT_OUTPUT_DIR } from "./config";
+import { exportLatestSnapshot } from "./export";
+import type { RatingState } from "./types";
+
+const db = openBenchDb(DEFAULT_DB_PATH);
+
+try {
+  const run = db.query(`
+    SELECT id, initial_elo, elo_k, rounds_requested
+    FROM runs
+    WHERE status = 'running'
+    ORDER BY datetime(started_at) DESC
+    LIMIT 1
+  `).get() as { id: string; initial_elo: number; elo_k: number; rounds_requested: number } | null;
+
+  if (!run) {
+    throw new Error("No running Quipbench run found to finalize");
+  }
+
+  const ratings = new Map<string, RatingState>();
+  for (const model of QUIPBENCH_MODELS) {
+    ratings.set(model.id, {
+      model,
+      elo: run.initial_elo,
+      wins: 0,
+      games: 0,
+    });
+  }
+
+  const matches = db.query(`
+    SELECT contestant_a_id, contestant_b_id, winner
+    FROM matches
+    WHERE run_id = $run_id
+    ORDER BY round_num ASC, id ASC
+  `).all({ $run_id: run.id }) as Array<{
+    contestant_a_id: string;
+    contestant_b_id: string;
+    winner: "A" | "B" | "TIE" | "ERROR";
+  }>;
+
+  let completed = 0;
+  let failures = 0;
+
+  for (const match of matches) {
+    const a = ratings.get(match.contestant_a_id);
+    const b = ratings.get(match.contestant_b_id);
+    if (!a || !b) continue;
+
+    if (match.winner === "ERROR") {
+      failures += 1;
+      continue;
+    }
+
+    completed += 1;
+    a.games += 1;
+    b.games += 1;
+
+    let scoreA = 0.5;
+    if (match.winner === "A") {
+      scoreA = 1;
+      a.wins += 1;
+    } else if (match.winner === "B") {
+      scoreA = 0;
+      b.wins += 1;
+    }
+
+    const next = updatePairElo(a.elo, b.elo, scoreA, run.elo_k);
+    a.elo = next.nextA;
+    b.elo = next.nextB;
+  }
+
+  const leaderboard = buildLeaderboard(Array.from(ratings.values()));
+  replaceRatings(db, run.id, leaderboard);
+
+  db.prepare(`
+    UPDATE runs
+    SET status = 'completed',
+        ended_at = $ended_at,
+        rounds_completed = $rounds_completed,
+        failures = $failures
+    WHERE id = $run_id
+  `).run({
+    $run_id: run.id,
+    $ended_at: new Date().toISOString(),
+    $rounds_completed: completed,
+    $failures: failures,
+  });
+
+  const out = await exportLatestSnapshot({
+    dbPath: DEFAULT_DB_PATH,
+    outputDir: DEFAULT_OUTPUT_DIR,
+    runId: run.id,
+  });
+
+  console.log(`Finalized partial run: ${run.id}`);
+  console.log(`Completed rounds: ${completed}`);
+  console.log(`Failures: ${failures}`);
+  console.log(`Snapshot JS: ${out.latestJsPath}`);
+} finally {
+  db.close();
+}
diff --git a/bench/integration.test.ts b/bench/integration.test.ts
new file mode 100644
index 0000000..bbe31cb
--- /dev/null
+++ b/bench/integration.test.ts
@@ -0,0 +1,120 @@
+import { expect, test } from "bun:test";
+import { mkdtempSync, readFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Database } from "bun:sqlite";
+import { runQuipbench } from "./run";
+import type { BenchModel, QuipbenchAi } from "./types";
+
+const TEST_MODELS: BenchModel[] = [
+  { id: "model/a", name: "Model A" },
+  { id: "model/b", name: "Model B" },
+  { id: "model/c", name: "Model C" },
+  { id: "model/d", name: "Model D" },
+];
+
+const STUB_AI: QuipbenchAi = {
+  async generatePrompt(model) {
+    return `Prompt by ${model.name}`;
+  },
+  async generateAnswer(model, prompt) {
+    return `${model.name} answer for ${prompt}`;
+  },
+  async vote(_voter, _prompt, answerA, answerB) {
+    return answerA.length >= answerB.length ? "A" : "B";
+  },
+};
+
+test("run writes DB rows and latest snapshot", async () => {
+  const tempRoot = mkdtempSync(join(tmpdir(), "quipbench-it-"));
+  const dbPath = join(tempRoot, "bench.sqlite");
+  const outDir = join(tempRoot, "out");
+
+  const result = await runQuipbench({
+    rounds: 8,
+    concurrency: 2,
+    seed: 123,
+    models: TEST_MODELS,
+    ai: STUB_AI,
+    dbPath,
+    outputDir: outDir,
+  });
+
+  expect(result.runMeta.roundsRequested).toBe(8);
+  expect(result.runMeta.roundsCompleted + result.runMeta.failures).toBe(8);
+  expect(result.leaderboard.length).toBe(TEST_MODELS.length);
+
+  const db = new Database(dbPath);
+  const runsCount = (db.query("SELECT COUNT(*) as c FROM runs").get() as { c: number }).c;
+  const matchesCount = (
+    db.query("SELECT COUNT(*) as c FROM matches WHERE run_id = $run_id").get({
+      $run_id: result.runMeta.runId,
+    }) as { c: number }
+  ).c;
+  const ratingsCount = (
+    db.query("SELECT COUNT(*) as c FROM ratings WHERE run_id = $run_id").get({
+      $run_id: result.runMeta.runId,
+    }) as { c: number }
+  ).c;
+  db.close();
+
+  expect(runsCount).toBe(1);
+  expect(matchesCount).toBe(8);
+  expect(ratingsCount).toBe(TEST_MODELS.length);
+
+  const latest = JSON.parse(readFileSync(result.snapshotPathJson, "utf8")) as {
+    runMeta: { runId: string };
+    leaderboard: unknown[];
+    chart: unknown[];
+  };
+
+  expect(latest.runMeta.runId).toBe(result.runMeta.runId);
+  expect(latest.leaderboard.length).toBe(TEST_MODELS.length);
+  expect(latest.chart.length).toBe(TEST_MODELS.length);
+});
+
+test("round failures are persisted and run still finalizes", async () => {
+  const tempRoot = mkdtempSync(join(tmpdir(), "quipbench-it-fail-"));
+  const dbPath = join(tempRoot, "bench.sqlite");
+  const outDir = join(tempRoot, "out");
+
+  const failingAi: QuipbenchAi = {
+    async generatePrompt() {
+      throw new Error("forced prompt failure");
+    },
+    async generateAnswer() {
+      return "unused";
+    },
+    async vote() {
+      return "A";
+    },
+  };
+
+  const result = await runQuipbench({
+    rounds: 3,
+    concurrency: 2,
+    seed: 7,
+    models: TEST_MODELS,
+    ai: failingAi,
+    dbPath,
+    outputDir: outDir,
+  });
+
+  expect(result.runMeta.roundsCompleted).toBe(0);
+  expect(result.runMeta.failures).toBe(3);
+
+  const db = new Database(dbPath);
+  const errorMatches = (
+    db
+      .query("SELECT COUNT(*) as c FROM matches WHERE run_id = $run_id AND winner = 'ERROR'")
+      .get({ $run_id: result.runMeta.runId }) as { c: number }
+  ).c;
+  db.close();
+
+  expect(errorMatches).toBe(3);
+
+  const latest = JSON.parse(readFileSync(result.snapshotPathJson, "utf8")) as {
+    runMeta: { failures: number };
+  };
+  expect(latest.runMeta.failures).toBe(3);
+});
diff --git a/bench/leaderboard.test.ts b/bench/leaderboard.test.ts
new file mode 100644
index 0000000..27cfa0b
--- /dev/null
+++ b/bench/leaderboard.test.ts
@@ -0,0 +1,25 @@
+import { expect, test } from "bun:test";
+import { buildLeaderboard } from "./leaderboard";
+
+const model = (id: string, name: string) => ({ id, name });
+
+test("leaderboard sorts by elo, then wins, then win rate, then name", () => {
+  const rows = buildLeaderboard([
+    { model: model("a", "Alpha"), elo: 1500, wins: 5, games: 10 },
+    { model: model("b", "Beta"), elo: 1600, wins: 1, games: 1 },
+    { model: model("c", "Charlie"), elo: 1500, wins: 6, games: 12 },
+    { model: model("d", "Delta"), elo: 1500, wins: 6, games: 8 },
+    { model: model("e", "Echo"), elo: 1500, wins: 6, games: 8 },
+  ]);
+
+  expect(rows.map((r) => r.modelName)).toEqual([
+    "Beta",
+    "Delta",
+    "Echo",
+    "Charlie",
+    "Alpha",
+  ]);
+  expect(rows[0]?.rank).toBe(1);
+  expect(rows[4]?.rank).toBe(5);
+  expect(rows[1]?.winRate).toBe(75);
+});
diff --git a/bench/leaderboard.ts b/bench/leaderboard.ts
new file mode 100644
index 0000000..6e73f81
--- /dev/null
+++ b/bench/leaderboard.ts
@@ -0,0 +1,25 @@
+import type { LeaderboardRow, RatingState } from "./types";
+
+export function buildLeaderboard(rows: RatingState[]): LeaderboardRow[] {
+  const sorted = [...rows].sort((a, b) => {
+    if (b.elo !== a.elo) return b.elo - a.elo;
+    if (b.wins !== a.wins) return b.wins - a.wins;
+    const wrA = a.games > 0 ? a.wins / a.games : 0;
+    const wrB = b.games > 0 ? b.wins / b.games : 0;
+    if (wrB !== wrA) return wrB - wrA;
+    return a.model.name.localeCompare(b.model.name);
+  });
+
+  return sorted.map((entry, index) => ({
+    rank: index + 1,
+    modelId: entry.model.id,
+    modelName: entry.model.name,
+    elo: Number(entry.elo.toFixed(2)),
+    wins: entry.wins,
+    games: entry.games,
+    winRate:
+      entry.games > 0
+        ? Number(((entry.wins / entry.games) * 100).toFixed(2))
+        : 0,
+  }));
+}
diff --git a/bench/models.ts b/bench/models.ts
new file mode 100644
index 0000000..1b4115d
--- /dev/null
+++ b/bench/models.ts
@@ -0,0 +1,12 @@
+import type { BenchModel } from "./types";
+
+// Quipbench source-of-truth model roster.
+export const QUIPBENCH_MODELS: BenchModel[] = [
+  { id: "google/gemini-3.1-pro-preview", name: "Gemini 3.1 Pro" },
+  { id: "moonshotai/kimi-k2", name: "Kimi K2" },
+  { id: "deepseek/deepseek-v3.2", name: "DeepSeek 3.2" },
+  { id: "openai/gpt-5.2", name: "GPT-5.2" },
+  { id: "anthropic/claude-opus-4.6", name: "Opus 4.6" },
+  { id: "anthropic/claude-sonnet-4.6", name: "Sonnet 4.6" },
+  { id: "x-ai/grok-4.1-fast", name: "Grok 4.1" },
+];
diff --git a/bench/open.ts b/bench/open.ts
new file mode 100644
index 0000000..4f14248
--- /dev/null
+++ b/bench/open.ts
@@ -0,0 +1,30 @@
+import { spawnSync } from "node:child_process";
+import { resolve } from "node:path";
+
+const dashboardPath = resolve(import.meta.dir, "dashboard", "index.html");
+
+function openPath(target: string) {
+  if (process.platform === "darwin") {
+    return spawnSync("open", [target], { stdio: "inherit" });
+  }
+
+  if (process.platform === "win32") {
+    return spawnSync("cmd", ["/c", "start", "", target], {
+      stdio: "inherit",
+      shell: false,
+    });
+  }
+
+  return spawnSync("xdg-open", [target], { stdio: "inherit" });
+}
+
+const result = openPath(dashboardPath);
+if (result.error) {
+  console.error(`Could not open dashboard: ${result.error.message}`);
+  process.exit(1);
+}
+if (typeof result.status === "number" && result.status !== 0) {
+  process.exit(result.status);
+}
+
+console.log(`Opened Quipbench dashboard: ${dashboardPath}`);
diff --git a/bench/run.ts b/bench/run.ts
new file mode 100644
index 0000000..0e1e9cd
--- /dev/null
+++ b/bench/run.ts
@@ -0,0 +1,525 @@
+import { mkdirSync } from "node:fs";
+import { join } from "node:path";
+import {
+  DEFAULT_CONCURRENCY,
+  DEFAULT_DB_PATH,
+  DEFAULT_ELO_K,
+  DEFAULT_INITIAL_ELO,
+  DEFAULT_OUTPUT_DIR,
+  DEFAULT_ROUNDS,
+  parsePositiveInt,
+  parsePositiveNumber,
+} from "./config";
+import { QUIPBENCH_MODELS } from "./models";
+import type {
+  BenchModel,
+  MatchRecord,
+  QuipbenchAi,
+  QuipbenchRunResult,
+  RatingState,
+  VoteRecord,
+} from "./types";
+import { updatePairElo } from "./elo";
+import { buildLeaderboard } from "./leaderboard";
+import {
+  finalizeRun,
+  insertMatch,
+  insertRunStart,
+  openBenchDb,
+  replaceRatings,
+  updateRunProgress,
+} from "./db";
+import { exportLatestSnapshot } from "./export";
+
+type RetryFn = <T>(
+  fn: () => Promise<T>,
+  validate: (result: T) => boolean,
+  retries: number,
+  label: string,
+) => Promise<T>;
+
+function defaultIsRealString(value: string, minLength = 5): boolean {
+  return value.trim().length >= minLength;
+}
+
+const defaultWithRetry: RetryFn = async (fn, validate, retries, label) => {
+  let lastError: unknown;
+
+  for (let attempt = 1; attempt <= retries; attempt++) {
+    try {
+      const result = await fn();
+      if (validate(result)) return result;
+      lastError = new Error(`${label}: validation failed`);
+    } catch (error) {
+      lastError = error;
+    }
+
+    if (attempt < retries) {
+      await new Promise((resolve) => setTimeout(resolve, 250 * attempt));
+    }
+  }
+
+  throw lastError instanceof Error
+    ? lastError
+    : new Error(`${label}: all retry attempts failed`);
+};
+
+async function loadLiveAi(): Promise<{
+  ai: QuipbenchAi;
+  retry: RetryFn;
+  isRealStringFn: (value: string, minLength?: number) => boolean;
+}> {
+  const gameModule = await import("../game.ts");
+  type GameModel = typeof gameModule.MODELS[number];
+
+  function toGameModel(model: BenchModel): GameModel {
+    return model as unknown as GameModel;
+  }
+
+  return {
+    ai: {
+      async generatePrompt(model) {
+        return gameModule.callGeneratePrompt(toGameModel(model));
+      },
+      async generateAnswer(model, prompt) {
+        return gameModule.callGenerateAnswer(toGameModel(model), prompt);
+      },
+      async vote(voter, prompt, answerA, answerB) {
+        return gameModule.callVote(
+          toGameModel(voter),
+          prompt,
+          { answer: answerA },
+          { answer: answerB },
+        );
+      },
+    },
+    retry: gameModule.withRetry as RetryFn,
+    isRealStringFn: gameModule.isRealString,
+  };
+}
+
+function mulberry32(seed: number): () => number {
+  let t = seed >>> 0;
+  return () => {
+    t += 0x6d2b79f5;
+    let r = Math.imul(t ^ (t >>> 15), t | 1);
+    r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
+    return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
+  };
+}
+
+function roundRng(seed: number, roundNum: number): () => number {
+  const mixed = (seed ^ Math.imul(roundNum, 0x9e3779b1)) >>> 0;
+  return mulberry32(mixed);
+}
+
+function shuffleWithRng<T>(items: T[], rng: () => number): T[] {
+  const arr = [...items];
+  for (let i = arr.length - 1; i > 0; i--) {
+    const j = Math.floor(rng() * (i + 1));
+    [arr[i], arr[j]] = [arr[j]!, arr[i]!];
+  }
+  return arr;
+}
+
+function parseArgs(argv: string[]) {
+  const args: Record<string, string> = {};
+  for (const raw of argv) {
+    if (!raw.startsWith("--")) continue;
+    const trimmed = raw.slice(2);
+    const [key, ...rest] = trimmed.split("=");
+    if (!key) continue;
+    args[key] = rest.length === 0 ? "true" : rest.join("=");
+  }
+  return args;
+}
+
+function createRunId(): string {
+  return `quipbench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+}
+
+type RoundRunResult = {
+  match: MatchRecord;
+};
+
+async function runRound(options: {
+  runId: string;
+  roundNum: number;
+  models: BenchModel[];
+  ai: QuipbenchAi;
+  seed: number;
+  retry: RetryFn;
+  isRealStringFn: (value: string, minLength?: number) => boolean;
+}): Promise<RoundRunResult> {
+  const { runId, roundNum, models, ai, seed, retry, isRealStringFn } = options;
+  const rng = roundRng(seed, roundNum);
+  const shuffled = shuffleWithRng(models, rng);
+
+  const prompter = shuffled[0]!;
+  const contestantA = shuffled[1]!;
+  const contestantB = shuffled[2]!;
+  const voters = [prompter, ...shuffled.slice(3)];
+
+  const baseMatch: MatchRecord = {
+    runId,
+    roundNum,
+    prompter,
+    contestantA,
+    contestantB,
+    votesA: 0,
+    votesB: 0,
+    winner: "ERROR",
+    votes: [],
+  };
+
+  let prompt = "";
+  try {
+    prompt = await retry(
+      () => ai.generatePrompt(prompter),
+      (value) => isRealStringFn(value, 10),
+      3,
+      `QB:R${roundNum}:prompt:${prompter.name}`,
+    );
+  } catch (error) {
+    return {
+      match: {
+        ...baseMatch,
+        winner: "ERROR",
+        error:
+          error instanceof Error
+            ? `Prompt failed: ${error.message}`
+            : "Prompt failed",
+      },
+    };
+  }
+
+  let answerA = "";
+  let answerB = "";
+  try {
+    [answerA, answerB] = await Promise.all([
+      retry(
+        () => ai.generateAnswer(contestantA, prompt),
+        (value) => isRealStringFn(value, 3),
+        3,
+        `QB:R${roundNum}:answer:${contestantA.name}`,
+      ),
+      retry(
+        () => ai.generateAnswer(contestantB, prompt),
+        (value) => isRealStringFn(value, 3),
+        3,
+        `QB:R${roundNum}:answer:${contestantB.name}`,
+      ),
+    ]);
+  } catch (error) {
+    return {
+      match: {
+        ...baseMatch,
+        prompt,
+        winner: "ERROR",
+        error:
+          error instanceof Error
+            ? `Answer failed: ${error.message}`
+            : "Answer failed",
+      },
+    };
+  }
+
+  const votes: VoteRecord[] = [];
+  let votesA = 0;
+  let votesB = 0;
+
+  await Promise.all(
+    voters.map(async (voter) => {
+      const showAFirst = rng() > 0.5;
+      const first = showAFirst ? answerA : answerB;
+      const second = showAFirst ? answerB : answerA;
+
+      try {
+        const decision = await retry(
+          () => ai.vote(voter, prompt, first, second),
+          (value) => value === "A" || value === "B",
+          3,
+          `QB:R${roundNum}:vote:${voter.name}`,
+        );
+
+        const votedForA = showAFirst ? decision === "A" : decision === "B";
+        if (votedForA) votesA += 1;
+        else votesB += 1;
+
+        votes.push({
+          voterId: voter.id,
+          voterName: voter.name,
+          votedFor: votedForA ? "A" : "B",
+          showAFirst,
+        });
+      } catch (error) {
+        votes.push({
+          voterId: voter.id,
+          voterName: voter.name,
+          votedFor: null,
+          showAFirst,
+          error: error instanceof Error ? error.message : String(error),
+        });
+      }
+    }),
+  );
+
+  let winner: MatchRecord["winner"] = "TIE";
+  if (votesA > votesB) winner = "A";
+  else if (votesB > votesA) winner = "B";
+
+  return {
+    match: {
+      ...baseMatch,
+      prompt,
+      answerA,
+      answerB,
+      votesA,
+      votesB,
+      winner,
+      votes,
+    },
+  };
+}
+
+export type RunQuipbenchOptions = {
+  rounds?: number;
+  concurrency?: number;
+  eloK?: number;
+  initialElo?: number;
+  seed?: number;
+  outputDir?: string;
+  dbPath?: string;
+  models?: BenchModel[];
+  ai?: QuipbenchAi;
+};
+
+export async function runQuipbench(
+  options: RunQuipbenchOptions = {},
+): Promise<QuipbenchRunResult> {
+  const rounds = options.rounds ?? DEFAULT_ROUNDS;
+  const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
+  const eloK = options.eloK ?? DEFAULT_ELO_K;
+  const initialElo = options.initialElo ?? DEFAULT_INITIAL_ELO;
+  const models = options.models ?? QUIPBENCH_MODELS;
+  const outputDir = options.outputDir ?? DEFAULT_OUTPUT_DIR;
+  const dbPath = options.dbPath ?? DEFAULT_DB_PATH;
+  const seed = options.seed ?? Math.floor(Math.random() * 2_000_000_000);
+
+  if (!options.ai && !process.env.OPENROUTER_API_KEY) {
+    throw new Error("OPENROUTER_API_KEY is required for live Quipbench runs");
+  }
+
+  if (models.length < 3) {
+    throw new Error("Quipbench requires at least 3 models");
+  }
+
+  if (rounds <= 0 || concurrency <= 0 || eloK <= 0 || initialElo <= 0) {
+    throw new Error("rounds, concurrency, k, and initialElo must be positive");
+  }
+
+  let ai: QuipbenchAi = options.ai ?? {
+    async generatePrompt() {
+      throw new Error("Live AI is not loaded");
+    },
+    async generateAnswer() {
+      throw new Error("Live AI is not loaded");
+    },
+    async vote() {
+      throw new Error("Live AI is not loaded");
+    },
+  };
+  let retry: RetryFn = defaultWithRetry;
+  let isRealStringFn = defaultIsRealString;
+
+  if (!options.ai) {
+    const live = await loadLiveAi();
+    ai = live.ai;
+    retry = live.retry;
+    isRealStringFn = live.isRealStringFn;
+  }
+
+  mkdirSync(outputDir, { recursive: true });
+
+  const db = openBenchDb(dbPath);
+  const runId = createRunId();
+  const startedAt = new Date().toISOString();
+
+  insertRunStart(db, {
+    id: runId,
+    startedAt,
+    roundsRequested: rounds,
+    concurrency,
+    eloK,
+    initialElo,
+    seed,
+    outputDir,
+  });
+
+  const ratings = new Map<string, RatingState>();
+  for (const model of models) {
+    ratings.set(model.id, {
+      model,
+      elo: initialElo,
+      wins: 0,
+      games: 0,
+    });
+  }
+
+  let nextRound = 1;
+  let roundsCompleted = 0;
+  let failures = 0;
+
+  let writeLock: Promise<void> = Promise.resolve();
+  async function serializeWrite(fn: () => void | Promise<void>) {
+    writeLock = writeLock.then(fn, fn);
+    await writeLock;
+  }
+
+  async function worker() {
+    while (true) {
+      const roundNum = nextRound;
+      nextRound += 1;
+      if (roundNum > rounds) break;
+
+      const { match } = await runRound({
+        runId,
+        roundNum,
+        models,
+        ai,
+        seed,
+        retry,
+        isRealStringFn,
+      });
+
+      await serializeWrite(() => {
+        insertMatch(db, match);
+
+        if (match.winner === "ERROR") {
+          failures += 1;
+        } else {
+          roundsCompleted += 1;
+
+          const ratingA = ratings.get(match.contestantA.id);
+          const ratingB = ratings.get(match.contestantB.id);
+          if (!ratingA || !ratingB) {
+            throw new Error("Contestant rating state missing");
+          }
+
+          ratingA.games += 1;
+          ratingB.games += 1;
+
+          let scoreA = 0.5;
+          if (match.winner === "A") {
+            scoreA = 1;
+            ratingA.wins += 1;
+          } else if (match.winner === "B") {
+            scoreA = 0;
+            ratingB.wins += 1;
+          }
+
+          const updated = updatePairElo(ratingA.elo, ratingB.elo, scoreA, eloK);
+          ratingA.elo = updated.nextA;
+          ratingB.elo = updated.nextB;
+        }
+
+        updateRunProgress(db, runId, { roundsCompleted, failures });
+        process.stdout.write(
+          `\rQuipbench progress: ${roundsCompleted + failures}/${rounds} (ok=${roundsCompleted}, failed=${failures})`,
+        );
+      });
+    }
+  }
+
+  try {
+    const workers = Array.from(
+      { length: Math.min(concurrency, rounds) },
+      () => worker(),
+    );
+    await Promise.all(workers);
+    await writeLock;
+
+    const leaderboard = buildLeaderboard(Array.from(ratings.values()));
+    replaceRatings(db, runId, leaderboard);
+
+    const endedAt = new Date().toISOString();
+    finalizeRun(db, runId, "completed", endedAt);
+
+    const snapshotPaths = await exportLatestSnapshot({
+      dbPath,
+      outputDir,
+      runId,
+    });
+
+    process.stdout.write("\n");
+
+    return {
+      runMeta: {
+        runId,
+        startedAt,
+        endedAt,
+        roundsRequested: rounds,
+        roundsCompleted,
+        failures,
+        concurrency,
+        eloK,
+        initialElo,
+        seed,
+      },
+      leaderboard,
+      snapshotPathJson: snapshotPaths.latestJsonPath,
+      snapshotPathJs: snapshotPaths.latestJsPath,
+    };
+  } catch (error) {
+    finalizeRun(db, runId, "failed", new Date().toISOString());
+    throw error;
+  } finally {
+    db.close();
+  }
+}
+
+async function main() {
+  const args = parseArgs(process.argv.slice(2));
+
+  const rounds = parsePositiveInt(args.rounds, DEFAULT_ROUNDS);
+  const concurrency = parsePositiveInt(args.concurrency, DEFAULT_CONCURRENCY);
+  const eloK = parsePositiveNumber(args.k, DEFAULT_ELO_K);
+  const initialElo = parsePositiveNumber(args.initialElo, DEFAULT_INITIAL_ELO);
+  const seedArg = args.seed ? Number.parseInt(args.seed, 10) : undefined;
+  const seed = Number.isFinite(seedArg) ? seedArg : undefined;
+
+  const outputDir = args.out ? join(process.cwd(), args.out) : DEFAULT_OUTPUT_DIR;
+  const dbPath = args.db ? join(process.cwd(), args.db) : DEFAULT_DB_PATH;
+
+  const result = await runQuipbench({
+    rounds,
+    concurrency,
+    eloK,
+    initialElo,
+    seed,
+    outputDir,
+    dbPath,
+  });
+
+  console.log("Quipbench complete");
+  console.log(`Run ID: ${result.runMeta.runId}`);
+  console.log(
+    `Rounds: ${result.runMeta.roundsCompleted}/${result.runMeta.roundsRequested} (failures=${result.runMeta.failures})`,
+  );
+  console.log(`Snapshot JSON: ${result.snapshotPathJson}`);
+  console.log(`Snapshot JS: ${result.snapshotPathJs}`);
+
+  const preview = result.leaderboard.slice(0, 10);
+  for (const row of preview) {
+    console.log(
+      `${String(row.rank).padStart(2)}. ${row.modelName.padEnd(20)} Elo ${row.elo.toFixed(2).padStart(8)} | ${row.wins}/${row.games} (${row.winRate.toFixed(2)}%)`,
+    );
+  }
+}
+
+if (import.meta.main) {
+  main().catch((error) => {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  });
+}
diff --git a/bench/types.ts b/bench/types.ts
new file mode 100644
index 0000000..1cf5333
--- /dev/null
+++ b/bench/types.ts
@@ -0,0 +1,85 @@
+export type BenchModel = {
+  id: string;
+  name: string;
+};
+
+export type VoteRecord = {
+  voterId: string;
+  voterName: string;
+  votedFor: "A" | "B" | null;
+  showAFirst: boolean;
+  error?: string;
+};
+
+export type MatchRecord = {
+  runId: string;
+  roundNum: number;
+  prompter: BenchModel;
+  contestantA: BenchModel;
+  contestantB: BenchModel;
+  prompt?: string;
+  answerA?: string;
+  answerB?: string;
+  votesA: number;
+  votesB: number;
+  winner: "A" | "B" | "TIE" | "ERROR";
+  votes: VoteRecord[];
+  error?: string;
+};
+
+export type RatingState = {
+  model: BenchModel;
+  elo: number;
+  wins: number;
+  games: number;
+};
+
+export type LeaderboardRow = {
+  rank: number;
+  modelId: string;
+  modelName: string;
+  elo: number;
+  wins: number;
+  games: number;
+  winRate: number;
+};
+
+export type RunMeta = {
+  runId: string;
+  startedAt: string;
+  endedAt: string;
+  roundsRequested: number;
+  roundsCompleted: number;
+  failures: number;
+  concurrency: number;
+  eloK: number;
+  initialElo: number;
+  seed: number;
+};
+
+export type QuipbenchSnapshot = {
+  runMeta: RunMeta;
+  leaderboard: LeaderboardRow[];
+  chart: Array<{
+    modelName: string;
+    elo: number;
+  }>;
+};
+
+export type QuipbenchRunResult = {
+  runMeta: RunMeta;
+  leaderboard: LeaderboardRow[];
+  snapshotPathJson: string;
+  snapshotPathJs: string;
+};
+
+export type QuipbenchAi = {
+  generatePrompt: (model: BenchModel) => Promise<string>;
+  generateAnswer: (model: BenchModel, prompt: string) => Promise<string>;
+  vote: (
+    voter: BenchModel,
+    prompt: string,
+    answerA: string,
+    answerB: string,
+  ) => Promise<"A" | "B">;
+};
diff --git a/bun.lock b/bun.lock
index ae913d8..d6f2a45 100644
--- a/bun.lock
+++ b/bun.lock
@@ -1,6 +1,5 @@
 {
   "lockfileVersion": 1,
-  "configVersion": 1,
   "workspaces": {
     "": {
       "name": "quipslop",
@@ -11,6 +10,7 @@
         "puppeteer": "^24.2.0",
         "react": "^19.2.4",
         "react-dom": "^19.2.4",
+        "zod": "^4.3.6",
       },
       "devDependencies": {
         "@types/bun": "latest",
diff --git a/package.json b/package.json
index 50f949b..208af5c 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,10 @@
     "start:cli": "bun quipslop.tsx",
     "start:web": "bun --hot server.ts",
     "start:stream": "bun ./scripts/stream-browser.ts live",
-    "start:stream:dryrun": "bun ./scripts/stream-browser.ts dryrun"
+    "start:stream:dryrun": "bun ./scripts/stream-browser.ts dryrun",
+    "quipbench:run": "bun bench/run.ts",
+    "quipbench:export": "bun bench/export.ts",
+    "quipbench:open": "bun bench/open.ts"
   },
   "devDependencies": {
     "@types/bun": "latest",
@@ -24,6 +27,7 @@
     "ink": "^6.8.0",
     "puppeteer": "^24.2.0",
     "react": "^19.2.4",
-    "react-dom": "^19.2.4"
+    "react-dom": "^19.2.4",
+    "zod": "^4.3.6"
   }
 }