chorus-codes · chorus-codes · May 28, 2026 · May 28, 2026
diff --git a/src/app/api/run-artifacts/[chatId]/route.ts b/src/app/api/run-artifacts/[chatId]/route.ts
@@ -43,6 +43,13 @@ interface SwapEntry {
   ts: number;
   fromErrorKind?: string;
   fromErrorMessage?: string;
+  /** True iff the TO voice's run wrote `_stats.json` for this slot
+   *  (lineage + model stamp matches the slot's final _stats.json).
+   *  False when the chain advanced past this entry via collision or
+   *  exhaustion. UI uses this to gate the fallback banner so a slot
+   *  whose primary produced the displayed answer doesn't render a
+   *  misleading "fallback fired / actually ran" badge. */
+  actuallyRan?: boolean;
 }
 
 /**
@@ -138,6 +145,31 @@ function readChatSwaps(chatId: string): SwapEntry[] {
           // the swap's fromModel. Lets the UI render "kimi-k2.6 failed:
           // cli_failed — model not found" instead of a bare arrow.
           const attemptsByModel = readAttemptsByModel(partDir);
+          // Read the slot's `_stats.json` to find which lineage/model
+          // actually produced the final answer.md. reviewer.ts stamps
+          // `lineage` + `model` on every successful completion; the
+          // LAST writer wins. We use this to mark `actuallyRan: true`
+          // on only the swap entry whose `to` matches the final
+          // writer. Collision-exhausted chains (gemini → sonnet where
+          // sonnet was claimed elsewhere) leave _stats.json carrying
+          // the primary's stamp, so no swap entry gets actuallyRan —
+          // and the UI suppresses the misleading banners.
+          let finalLineage: string | undefined;
+          let finalModel: string | undefined;
+          try {
+            const statsPath = path.join(partDir, "_stats.json");
+            if (fs.existsSync(statsPath)) {
+              const stats = JSON.parse(fs.readFileSync(statsPath, "utf-8"));
+              if (typeof stats?.lineage === "string") {
+                finalLineage = stats.lineage;
+              }
+              if (typeof stats?.model === "string") {
+                finalModel = stats.model;
+              }
+            }
+          } catch {
+            /* malformed stats — leave finalLineage undefined */
+          }
           for (const entry of parsed) {
             const valid = isValidSwapEntry(entry);
             if (!valid) continue;
@@ -146,6 +178,10 @@ function readChatSwaps(chatId: string): SwapEntry[] {
               valid.fromErrorKind = att.errorKind;
               valid.fromErrorMessage = att.errorMessage;
             }
+            const toMatchesFinal =
+              finalLineage === valid.toLineage &&
+              (finalModel ?? null) === (valid.toModel ?? null);
+            valid.actuallyRan = toMatchesFinal;
             out.push(valid);
           }
         }

diff --git a/src/app/runs/[runId]/page.tsx b/src/app/runs/[runId]/page.tsx
@@ -44,23 +44,23 @@ async function getRunData(runId: string) {
   return { chat, template };
 }
 
-const AGENT_TO_LINEAGE: Record<string, "claude" | "codex" | "gemini" | "opencode" | "kimi" | "openrouter"> = {
-  "claude-code": "claude",
-  "codex-cli": "codex",
-  "gemini-cli": "gemini",
-  "opencode-cli": "opencode",
-  "kimi-cli": "kimi",
-  // HTTP-dispatched shim — runner creates `reviewer-openrouter-N` dirs;
-  // without this entry the lineage fell through to "claude" and rendered
-  // OpenRouter answers with the wrong brand on the run page.
-  openrouter: "openrouter",
-};
+// Single-source-of-truth map shared with the cockpit API route and the
+// run-viewer types. The previous inline copy here was missing
+// `antigravity-cli` and `grok-cli` — those participants fell through to
+// the `?? "claude"` default at the lookup site and the SSR snapshot
+// rendered phantom CLAUDE cards alongside the synthesized ANTIGRAVITY
+// placeholder. The /api/run-artifacts route reads AGENT_TO_UI_LINEAGE
+// (which has the full map), so a client-side poll later reclassified the
+// participant and the phantom vanished — "appears on refresh, goes away
+// after seconds" was exactly that drift.
+import { AGENT_TO_UI_LINEAGE as AGENT_TO_LINEAGE } from "@/lib/agent-name-map";
+import type { ReviewerLineage } from "@/lib/types";
 
 interface ParticipantSnapshot {
   participant: string;
   role: "doer" | "reviewer";
   agentName: string;
-  lineage: "claude" | "codex" | "gemini" | "opencode" | "kimi" | "openrouter";
+  lineage: ReviewerLineage;
   hasAnswer: boolean;
   answer?: string;
   findingsPreview?: string[];
@@ -100,7 +100,14 @@ function readChatRounds(chatId: string): RoundSnapshot[] {
       .map((d) => {
         const role: "doer" | "reviewer" = d.name.startsWith("doer-") ? "doer" : "reviewer";
         const rawAgent = d.name.replace(/^(doer-|reviewer-)/, "").replace(/-\d+$/, "");
-        const lineage = AGENT_TO_LINEAGE[rawAgent] ?? "claude";
+        // Mirror /api/run-artifacts route: pass the raw agent name
+        // through when unknown so the run-page banner shows the actual
+        // CLI name. The hardcoded "claude" fallback misclassified every
+        // future CLI as a phantom claude card. The cast widens the lookup
+        // result back to the ReviewerLineage union — `displayLineage` and
+        // the lineage-maps tolerate unknown strings by falling through to
+        // the lowercased label.
+        const lineage = (AGENT_TO_LINEAGE[rawAgent] ?? rawAgent) as ReviewerLineage;
         const answerPath = path.join(roundDir, d.name, "answer.md");
         // hasAnswer must mirror the API route: gated on the `## DONE`
         // sentinel so a mid-stream doer doesn't render as "done · no

diff --git a/src/components/run-viewer/participant-card.tsx b/src/components/run-viewer/participant-card.tsx
@@ -43,7 +43,7 @@
 */
 export function ParticipantCard({
  participant,
  isActive,
  liveTail,
  chatTerminal,
  chatId,
@@ -203,16 +203,28 @@
       </div>
 
       {swaps && swaps.length > 0 && (() => {
-        // Only the LAST entry's `to` voice actually produced an answer;
-        // intermediate `to` voices were attempted and themselves failed
-        // (which is what triggered the next swap). Showing "actually ran"
-        // on every row is wrong for chains of length > 1.
         const sorted = swaps.slice().sort((a, b) => a.fallbackIdx - b.fallbackIdx);
+        // Suppress the banner block entirely when the primary produced
+        // the displayed answer AND no swap target actually ran. This
+        // is the gemini bowerbird case (verdict_ambiguous false-
+        // positive → fallback fired → collision → chain exhausted)
+        // where the slot card was rendering DONE + content + three
+        // contradictory amber banners. The primary's content stands
+        // on its own; the failed-fallback breadcrumbs are noise.
+        const someSwapActuallyRan = sorted.some((s) => s.actuallyRan);
+        if (participant.hasAnswer && !someSwapActuallyRan) return null;
         return (
           <div className="space-y-1.5 border-b border-amber-500/30 bg-amber-500/5 px-4 py-2 text-[11px]">
             {sorted.map((s, i) => {
               const isCross = s.reason === "lineage_fallback";
-              const isLast = i === sorted.length - 1;
+              // `actuallyRan` is the truth-bearing signal — derived
+              // server-side from the slot's `_stats.json` lineage+
+              // model stamp matching this entry's `to` voice. The
+              // pre-fix `isLast` heuristic was wrong for collision-
+              // exhausted chains (where the last entry's TO voice
+              // never ran). Strikethrough + dim styling applies to
+              // any entry where the TO didn't run.
+              const ran = s.actuallyRan === true;
               return (
                 <div
                   key={`${s.fromLineage}-${s.fromModel}-${i}`}
@@ -230,14 +242,14 @@
                       <ArrowRight className="h-3 w-3 shrink-0 text-amber-300" />
                       <span
                         className={
-                          isLast
+                          ran
                             ? "font-medium text-amber-100"
                             : "text-amber-100/60 line-through"
                         }
                       >
                         {s.toLineage}/{s.toModel}
                       </span>
-                      {isLast && (
+                      {ran && (
                         <span className="rounded bg-amber-500/15 px-1 py-0.5 font-mono text-[9px] text-amber-200">
                           actually ran
                         </span>

diff --git a/src/components/run-viewer/types.ts b/src/components/run-viewer/types.ts
@@ -111,6 +111,17 @@ export interface FallbackSwap {
   fromErrorKind?: string;
   /** One-line message from the failing attempt; trimmed to ~200 chars. */
   fromErrorMessage?: string;
+  /** True iff the TO voice was the LAST writer of `_stats.json` for
+   *  this slot (the runner stamps lineage+model on every successful
+   *  `message_done`). Not "did it run at all" — a fallback that
+   *  ran-and-failed before message_done won't stamp stats and will
+   *  report false here. For the UI gate (suppress banner when primary
+   *  has answer + no swap actuallyRan), this is the right semantic:
+   *  the banner mismatch only matters when the displayed answer is the
+   *  primary's, which means no successful fallback writer overrode it.
+   *  Previously the UI inferred "actually ran" from `isLast` in the
+   *  swaps array — wrong for collision/exhausted chains. */
+  actuallyRan?: boolean;
 }
 
 export interface RoundSnapshot {

diff --git a/src/daemon/runner/reviewer-driver.ts b/src/daemon/runner/reviewer-driver.ts
@@ -621,6 +621,39 @@ async function runReviewer(
             return null;
           } catch (err) {
             threw = true;
+            // Thrown exceptions bypass reviewer.ts's _attempts.jsonl
+            // writer (the finally there only writes on errored=true,
+            // and a throw from spawn/timeout/abort never sets that
+            // flag). Without this row, the operator sees "fallback
+            // fired" with no record of why. Append a structurally-
+            // identical diagnostic before re-throwing so post-mortem
+            // grep across _attempts.jsonl is uniform across crash
+            // mode (errored, silent, thrown).
+            const message =
+              err instanceof Error ? err.message : String(err);
+            try {
+              const attemptsFile = path.join(reviewerDir, '_attempts.jsonl');
+              fs.appendFileSync(
+                attemptsFile,
+                JSON.stringify({
+                  ts: Date.now(),
+                  round,
+                  lineage: entry.lineage,
+                  model: entry.model ?? null,
+                  errorKind: 'runtime_error',
+                  errorMessage: message,
+                  durationMs: 0,
+                }) + '\n',
+              );
+            } catch {
+              /* best-effort */
+            }
+            console.warn(
+              `[reviewer] attempt threw chat=${chatId} round=${round} ` +
+                `slot=${agentName}-${reviewerIdx} ` +
+                `lineage=${entry.lineage} model=${entry.model ?? '(default)'} ` +
+                `kind=runtime_error message=${JSON.stringify(message).slice(0, 300)}`,
+            );
             throw err;
           } finally {
             if (threw) {

diff --git a/src/daemon/runner/reviewer.ts b/src/daemon/runner/reviewer.ts
@@ -223,6 +223,15 @@ export async function runReviewerHeadless(args: {
             path.join(reviewerDir, '_stats.json'),
             JSON.stringify({
               durationMs: Date.now() - startedAt,
+              // Lineage + model stamp lets the run-artifacts route
+              // tell whether the LAST voice that succeeded was the
+              // primary or a fallback. Without this stamp the cockpit
+              // can't distinguish "fallback actually ran" from
+              // "fallback collided, primary's answer is what's shown"
+              // — and renders contradictory banners on collision-
+              // exhausted slots like the bowerbird gemini case.
+              lineage: candidateLineage,
+              ...(candidateModel ? { model: candidateModel } : {}),
               ...(usageForStats ? { usage: usageForStats } : {}),
             }),
             'utf-8',
@@ -434,6 +443,13 @@ export async function runReviewerHeadless(args: {
     // lands regardless of how the run exits (success, error, abort).
     // Append-only JSONL keyed by (round, model) so multi-step fallback
     // chains leave a trail.
+    //
+    // Errored path only — silent failures (empty content, no error event,
+    // verdict_ambiguous) are written below at the null-return sites so
+    // zero rows in _attempts.jsonl genuinely means "no failure to log",
+    // not "diagnostic gap". Bowerbird chat 019E6E3318873C26DCA60409B84F90E9
+    // had a kimi slot that fell through to fallback with NO _attempts row
+    // and NO daemon log line — that gap is what this refactor closes.
     if (errored) {
       const errorKind = errorSummary?.kind ?? 'unknown';
       const errorMessage = errorSummary?.message ?? '(no message captured)';
@@ -445,33 +461,16 @@ export async function runReviewerHeadless(args: {
         lastError.kind = errorKind;
         lastError.message = errorMessage;
       }
-      const durationMs = Date.now() - startedAt;
-      try {
-        const attemptsFile = path.join(reviewerDir, '_attempts.jsonl');
-        const entry = {
-          ts: Date.now(),
-          round,
-          lineage: candidateLineage,
-          model: candidateModel ?? null,
-          errorKind,
-          errorMessage,
-          durationMs,
-        };
-        fs.appendFileSync(attemptsFile, JSON.stringify(entry) + '\n');
-      } catch {
-        /* best-effort — diagnostics shouldn't fail the run */
-      }
-      // Daemon-log line — same content as the JSONL row but at the
-      // daemon level so a single tail of ~/.chorus/logs/daemon.log
-      // shows every failed reviewer attempt across every chat without
-      // walking per-chat dirs. Grep-friendly key=value format mirrors
-      // the openrouter shim's own warn lines.
-      console.warn(
-        `[reviewer] attempt failed chat=${chatId} round=${round} ` +
-          `lineage=${candidateLineage} model=${candidateModel ?? '(default)'} ` +
-          `kind=${errorKind} duration_ms=${durationMs} ` +
-          `message=${JSON.stringify(errorMessage).slice(0, 300)}`,
-      );
+      writeAttemptRow({
+        reviewerDir,
+        chatId,
+        round,
+        lineage: candidateLineage,
+        model: candidateModel,
+        kind: errorKind,
+        message: errorMessage,
+        durationMs: Date.now() - startedAt,
+      });
     }
     // Mirror runDoerHeadless: surface answer.md write failures as a
     // cli_warning so the user sees "stream stopped writing" instead of
@@ -513,7 +512,31 @@ export async function runReviewerHeadless(args: {
   const streamed = finalText && finalText.length > 0 ? finalText : accumulated;
   const content = onDisk.trim().length > 0 ? onDisk : streamed;
   if (errored && content.trim().length === 0) return null;
-  if (content.trim().length === 0) return null;
+  if (content.trim().length === 0) {
+    // Silent-failure path: stream ended without an error event AND
+    // produced no usable content. Previously this returned null with no
+    // _attempts.jsonl row and no daemon log line, leaving the operator
+    // with no record of why a fallback fired. Synthesise a kind +
+    // diagnostic row so every null return is traceable.
+    if (lastError && !lastError.kind) {
+      lastError.kind = 'empty_no_error';
+      lastError.message =
+        'CLI exited without writing usable content to answer.md and without emitting an error event.';
+    }
+    writeAttemptRow({
+      reviewerDir,
+      chatId,
+      round,
+      lineage: candidateLineage,
+      model: candidateModel,
+      kind: lastError?.kind ?? 'empty_no_error',
+      message:
+        lastError?.message ??
+        'CLI exited without writing usable content to answer.md and without emitting an error event.',
+      durationMs: Date.now() - startedAt,
+    });
+    return null;
+  }
 
   // Record healthy on success so a stale quota_exhausted / auth_invalid
   // record from a prior session clears. Without this, the Reviewer
@@ -557,6 +580,58 @@ export async function runReviewerHeadless(args: {
     lastError.kind = 'verdict_ambiguous';
     lastError.message =
       'Reviewer wrote non-empty content but no approve/reject verdict was detected.';
+    writeAttemptRow({
+      reviewerDir,
+      chatId,
+      round,
+      lineage: candidateLineage,
+      model: candidateModel,
+      kind: 'verdict_ambiguous',
+      message: lastError.message,
+      durationMs: Date.now() - startedAt,
+    });
   }
   return verdict;
 }
+
+/**
+ * Append a diagnostic row to `_attempts.jsonl` and emit the matching
+ * `[reviewer] attempt failed …` daemon log line. Called from every path
+ * that returns a non-positive result (errored, empty-content, verdict-
+ * ambiguous) so post-mortem grep across reviewer-slot _attempts.jsonl
+ * sidecars shows every reason a slot fell back. Best-effort writes —
+ * diagnostic failure must never fail the run.
+ */
+function writeAttemptRow(args: {
+  reviewerDir: string;
+  chatId: string;
+  round: number;
+  lineage: string;
+  model: string | undefined;
+  kind: string;
+  message: string;
+  durationMs: number;
+}): void {
+  const { reviewerDir, chatId, round, lineage, model, kind, message, durationMs } = args;
+  try {
+    const attemptsFile = path.join(reviewerDir, '_attempts.jsonl');
+    const entry = {
+      ts: Date.now(),
+      round,
+      lineage,
+      model: model ?? null,
+      errorKind: kind,
+      errorMessage: message,
+      durationMs,
+    };
+    fs.appendFileSync(attemptsFile, JSON.stringify(entry) + '\n');
+  } catch {
+    /* best-effort — diagnostics shouldn't fail the run */
+  }
+  console.warn(
+    `[reviewer] attempt failed chat=${chatId} round=${round} ` +
+      `lineage=${lineage} model=${model ?? '(default)'} ` +
+      `kind=${kind} duration_ms=${durationMs} ` +
+      `message=${JSON.stringify(message).slice(0, 300)}`,
+  );
+}