chrishayuk · chrishayuk · Jun 17, 2026 · Jun 11, 2026 · Jun 12, 2026 · Jun 13, 2026
diff --git a/ROADMAP.md b/ROADMAP.md
diff --git a/ROADMAP_STATUS.md b/ROADMAP_STATUS.md
diff --git a/bench/aim-validation/ave_demo_gemma3-4b.json b/bench/aim-validation/ave_demo_gemma3-4b.json
@@ -0,0 +1 @@
+{"experiment":"ave_demo","vindex":"output/gemma3-4b-q4k-v2.vindex","explicit":[6,6],"schedule_end":[6,6],"native":[2,6],"false_fires":[0,11],"items":[{"leg":"explicit","prompt":"12 + 7 =","expected":"19","dispatch_ok":true,"native_ok":true,"native_text":"19\n12 - 7 =","native_tokens":11,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"12 + 7","alu_result":"19","emitted":" 19","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":3}},{"leg":"explicit","prompt":"123456 + 654321 =","expected":"777777","dispatch_ok":true,"native_ok":false,"native_text":"?\n\n123456 + 65432","native_tokens":15,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"123456 + 654321","alu_result":"777777","emitted":" 777777","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":7}},{"leg":"explicit","prompt":"100000 - 1 =","expected":"99999","dispatch_ok":true,"native_ok":true,"native_text":"99999\n100000 -","native_tokens":14,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"100000 - 1","alu_result":"99999","emitted":" 99999","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":6}},{"leg":"explicit","prompt":"12345 * 6789 =","expected":"83810205","dispatch_ok":true,"native_ok":false,"native_text":"8380000 - 1000 = 8","native_tokens":17,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"12345 * 6789","alu_result":"83810205","emitted":" 83810205","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":9}},{"leg":"explicit","prompt":"999 + 111 - 222 =","expected":"888","dispatch_ok":true,"native_ok":false,"native_text":"?\n\n999 + 111 =","native_tokens":12,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"999 + 111 - 222","alu_result":"888","emitted":" 888","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":4}},{"leg":"explicit","prompt":"858358354868358358358358 + 141641645131641641641641 =","expected":"999999999999999999999999","dispatch_ok":true,"native_ok":false,"native_text":"?\n\n**Calculation:**\n\n858358354868358358358358 + 1","native_tokens":33,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"858358354868358358358358 + 141641645131641641641641","alu_result":"999999999999999999999999","emitted":" 999999999999999999999999","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":25}},{"leg":"distractor","prompt":"My phone number is 4415550172.","fire":"no"},{"leg":"distractor","prompt":"The meeting is on 2026-06-11.","fire":"no"},{"leg":"distractor","prompt":"Train 9 departs at 18:45 from platform 3.","fire":"no"},{"leg":"distractor","prompt":"Order 66 was executed in 19 BBY.","fire":"no"},{"leg":"distractor","prompt":"Account 123456789012345678901234567890 is active.","fire":"no"},{"leg":"distractor","prompt":"What is the capital of France?","fire":"no"},{"leg":"distractor","prompt":"It takes 5 - 10 business days.","fire":"no"},{"leg":"distractor","prompt":"I work a 9 - 5 job.","fire":"no"},{"leg":"distractor","prompt":"a 4 x 4 truck parked outside","fire":"no"},{"leg":"distractor","prompt":"Are you available 9 - 5?","fire":"no"},{"leg":"distractor","prompt":"dated 2026 - 06 - 11 in the ledger","fire":"no"}]}
diff --git a/bench/aim-validation/ave_stream_trigger_gemma3-4b.json b/bench/aim-validation/ave_stream_trigger_gemma3-4b.json
diff --git a/bench/aim-validation/fr3_explicit_rewrite_gemma3-4b.json b/bench/aim-validation/fr3_explicit_rewrite_gemma3-4b.json
@@ -0,0 +1 @@
+{"experiment":"fr3_explicit_rewrite","vindex":"output/gemma3-4b-q4k-v2.vindex","synonym_top1":[6,6],"phrasing_top1":[6,6],"distractor_fires":[0,3],"cases":[{"w":"seat","bucket":"synonym","expected":"capital","top1":"capital","rank":1},{"w":"metropolis","bucket":"synonym","expected":"capital","top1":"capital","rank":1},{"w":"money","bucket":"synonym","expected":"currency","top1":"currency","rank":1},{"w":"cash","bucket":"synonym","expected":"currency","top1":"currency","rank":1},{"w":"tongue","bucket":"synonym","expected":"language","top1":"language","rank":1},{"w":"speech","bucket":"synonym","expected":"language","top1":"language","rank":1},{"w":"head city","bucket":"phrasing","expected":"capital","top1":"capital","rank":1},{"w":"main city","bucket":"phrasing","expected":"capital","top1":"capital","rank":1},{"w":"legal tender","bucket":"phrasing","expected":"currency","top1":"currency","rank":1},{"w":"unit of money","bucket":"phrasing","expected":"currency","top1":"currency","rank":1},{"w":"spoken language","bucket":"phrasing","expected":"language","top1":"language","rank":1},{"w":"mother tongue","bucket":"phrasing","expected":"language","top1":"language","rank":1},{"w":"banana","bucket":"distractor","expected":"","top1":"none","rank":-1},{"w":"weather","bucket":"distractor","expected":"","top1":"none","rank":-1},{"w":"altitude","bucket":"distractor","expected":"","top1":"none","rank":-1}]}
diff --git a/bench/aim-validation/fr3_template_ablation_gemma3-4b.json b/bench/aim-validation/fr3_template_ablation_gemma3-4b.json
@@ -0,0 +1 @@
+{"experiment":"fr3_template_ablation","vindex":"output/gemma3-4b-q4k-v2.vindex","n_entities":6,"held_out_template":"The {r} for {e} would be","layers":[{"layer":6,"acc_k1":0.3889,"acc_k2":0.3333,"acc_k4":0.8333},{"layer":10,"acc_k1":0.3333,"acc_k2":0.3889,"acc_k4":0.3889},{"layer":14,"acc_k1":0.3333,"acc_k2":0.3333,"acc_k4":0.3333},{"layer":20,"acc_k1":0.1667,"acc_k2":0.1111,"acc_k4":0.1667}]}
diff --git a/bench/aim-validation/fr_early_exit_decode_projection_gemma3-4b.json b/bench/aim-validation/fr_early_exit_decode_projection_gemma3-4b.json
@@ -0,0 +1 @@
+{"experiment":"fr_early_exit_decode_projection","vindex":"output/gemma3-4b-q4k-v2.vindex","install_layer":24,"num_layers":34,"fired":12,"full_ms":466.4905,"early_ms":344.9526,"per_token_speedup":1.3523}
diff --git a/bench/baselines/_c10_26b_larql_inproc.json b/bench/baselines/_c10_26b_larql_inproc.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781043889",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 4446.251667,
+      "ms_per_tok": {
+        "mean": 159.1440967795276,
+        "p50": 153.134333,
+        "p99": 267.903
+      },
+      "tok_per_s": 6.283613531611942,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_c10_26b_larql_inproc_off.json b/bench/baselines/_c10_26b_larql_inproc_off.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781130875",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 6587.231458,
+      "ms_per_tok": {
+        "mean": 140.4677713307087,
+        "p50": 134.121208,
+        "p99": 216.553542
+      },
+      "tok_per_s": 7.119070734351308,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_c10_26b_larql_inproc_q4kattn.json b/bench/baselines/_c10_26b_larql_inproc_q4kattn.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781130897",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 651.566959,
+      "ms_per_tok": {
+        "mean": 103.53604429133861,
+        "p50": 101.69933400000001,
+        "p99": 157.133625
+      },
+      "tok_per_s": 9.658472147014947,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_c10_26b_larql_inproc_rerun.json b/bench/baselines/_c10_26b_larql_inproc_rerun.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781045118",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 2574.087584,
+      "ms_per_tok": {
+        "mean": 460.48107119685034,
+        "p50": 434.098458,
+        "p99": 712.708333
+      },
+      "tok_per_s": 2.1716419252606185,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_c10_26b_larql_inproc_tmax.json b/bench/baselines/_c10_26b_larql_inproc_tmax.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781044778",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 1788.925916,
+      "ms_per_tok": {
+        "mean": 307.8270738188976,
+        "p50": 292.93525,
+        "p99": 477.71054200000003
+      },
+      "tok_per_s": 3.2485771559792207,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_full.json b/bench/baselines/_qres_full.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781204761",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 651.106875,
+      "ms_per_tok": {
+        "mean": 71.86225622834645,
+        "p50": 69.125958,
+        "p99": 126.85254100000002
+      },
+      "tok_per_s": 13.915510763013653,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_full_asm.json b/bench/baselines/_qres_full_asm.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781204777",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 878.4857499999999,
+      "ms_per_tok": {
+        "mean": 63.034867456692886,
+        "p50": 62.892792,
+        "p99": 70.413875
+      },
+      "tok_per_s": 15.864235784854062,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_full_int8attn.json b/bench/baselines/_qres_full_int8attn.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781215508",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 623.639625,
+      "ms_per_tok": {
+        "mean": 53.79693537007876,
+        "p50": 51.4435,
+        "p99": 85.4415
+      },
+      "tok_per_s": 18.588419454023185,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_full_kvappend.json b/bench/baselines/_qres_full_kvappend.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781303433",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 648.1185409999999,
+      "ms_per_tok": {
+        "mean": 35.78364665354332,
+        "p50": 35.689167,
+        "p99": 38.137292
+      },
+      "tok_per_s": 27.945726428666802,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_full_q6kasm.json b/bench/baselines/_qres_full_q6kasm.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781216286",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 601.012541,
+      "ms_per_tok": {
+        "mean": 46.30266011023623,
+        "p50": 46.490790999999994,
+        "p99": 51.000875
+      },
+      "tok_per_s": 21.597031307039913,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_full_serialcuts.json b/bench/baselines/_qres_full_serialcuts.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781222172",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 704.3161249999999,
+      "ms_per_tok": {
+        "mean": 42.604734905511805,
+        "p50": 42.479625,
+        "p99": 48.934290999999995
+      },
+      "tok_per_s": 23.47156958534741,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_full_v3.json b/bench/baselines/_qres_full_v3.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781218079",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 661.730292,
+      "ms_per_tok": {
+        "mean": 46.920711937007894,
+        "p50": 46.163792,
+        "p99": 66.024875
+      },
+      "tok_per_s": 21.31254959094658,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/_qres_off.json b/bench/baselines/_qres_off.json
@@ -0,0 +1,23 @@
+{
+  "timestamp": "1781204743",
+  "model": "output/gemma4-26b-a4b-q4k.vindex",
+  "prompt": "Write a long detailed essay about the history of the Roman empire, covering its founding, rise, and fall:",
+  "tokens": 128,
+  "wire": null,
+  "concurrent": 1,
+  "results": [
+    {
+      "backend": "larql-cpu-moe (standard)",
+      "prefill_ms": 4660.109875,
+      "ms_per_tok": {
+        "mean": 131.48366732283458,
+        "p50": 126.305417,
+        "p99": 192.647666
+      },
+      "tok_per_s": 7.605507363471078,
+      "wire_bytes_per_tok": null,
+      "n_steps": 127,
+      "note": "in-process experts, KV-cached"
+    }
+  ]
+}
diff --git a/bench/baselines/c10_gemma3-4b_cpu_reconciled.json b/bench/baselines/c10_gemma3-4b_cpu_reconciled.json
@@ -42,5 +42,5 @@
     "ollama num_gpu=0 needs a warmup call after any GPU-mode use (mode switch forces a model reload); short unwarmed measurements are unreliable for a CPU baseline."
   ],
   "mechanism_confirms_C12": "The reconciled ~1.6-1.8x gap is the same kernel-quality gap the C12 diagnosis identified: 1.73x per-core, larql's NEON intrinsics (LLVM-lowered) vs llama.cpp's hand-asm Q4K x Q8K SDOT (two-super-block interleave + prefetch). Both attention and FFN already run the int8 Q8_K SDOT kernel. The lever remains C12 (hand-asm kernel), unchanged.",
-  "still_owed": "26B-A4B llama.cpp CPU baseline (needs a 26B GGUF, not local) -- that is the number that actually pins the medium-term tier."
+  "still_owed": "CLEARED 2026-06-10 -- see c10_gemma4-26b-a4b_cpu_reconciled.json (llama.cpp 32.1 vs larql 7.1 default / 9.7 with LARQL_Q4K_DIRECT_ATTN; the 26B gap is f32-residency byte traffic, not the C12 kernel)."
 }
diff --git a/bench/baselines/c10_gemma4-26b-a4b_cpu_RUNBOOK.md b/bench/baselines/c10_gemma4-26b-a4b_cpu_RUNBOOK.md
@@ -1,5 +1,24 @@
 # Runbook — Gemma 4 26B-A4B CPU baseline (the medium-term-tier pin)
 
+> **✅ COMPLETE 2026-06-10** — results in
+> `c10_gemma4-26b-a4b_cpu_reconciled.json`: llama.cpp 32.1 tok/s vs larql
+> in-process 7.1 (default) / 9.7 (`LARQL_Q4K_DIRECT_ATTN=1`) / loopback 7.3
+> (t=8, warm, n=128, drift-bracketed). Gap = f32-residency byte traffic
+> (~10 GB/tok vs ~2.1), not the kernel. Tier 62%→70%.
+>
+> **Corrections to this runbook learned in the run:**
+> - §2: serve with `--experts 0-127`, NOT `--ffn-only` (no expert endpoints →
+>   "bad expert response"). And `larql bench --moe-shards` still uses the
+>   pre-C1 path (fails on CPU, #146 signature) — use
+>   `larql run --moe-shards --engine standard` with `RAYON_NUM_THREADS=8`.
+> - §Method addition (mandatory): `pmset -g batt` must show AC/full charge,
+>   and bracket the matrix with a llama-bench drift check. The first session
+>   was invalidated by a silent battery drain (llama.cpp itself fell 34→1.05
+>   tok/s at 31% battery; ~30×, far beyond the thermal class) plus Spotlight
+>   churn after 30+ GB of model I/O.
+> - The 1.8-vs-4.4 question dissolves: both were artifacts (cold n=8 smoke vs
+>   cross-session conditions). Warm AC: in-process 7.1 ≈ loopback 7.3.
+
 Goal: produce `c10_gemma4-26b-a4b_cpu_reconciled.json` — the missing
 26B-A4B CPU decode number that pins the **medium-term achievability tier**
 (currently 62%, gate rule in `ROADMAP.md`: *"if 10 tok/s ≈ llama.cpp-on-26B-CPU
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"experiment":"ave_demo","vindex":"output/gemma3-4b-q4k-v2.vindex","explicit":[6,6],"schedule_end":[6,6],"native":[2,6],"false_fires":[0,11],"items":[{"leg":"explicit","prompt":"12 + 7 =","expected":"19","dispatch_ok":true,"native_ok":true,"native_text":"19\n12 - 7 =","native_tokens":11,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"12 + 7","alu_result":"19","emitted":" 19","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":3}},{"leg":"explicit","prompt":"123456 + 654321 =","expected":"777777","dispatch_ok":true,"native_ok":false,"native_text":"?\n\n123456 + 65432","native_tokens":15,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"123456 + 654321","alu_result":"777777","emitted":" 777777","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":7}},{"leg":"explicit","prompt":"100000 - 1 =","expected":"99999","dispatch_ok":true,"native_ok":true,"native_text":"99999\n100000 -","native_tokens":14,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"100000 - 1","alu_result":"99999","emitted":" 99999","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":6}},{"leg":"explicit","prompt":"12345 * 6789 =","expected":"83810205","dispatch_ok":true,"native_ok":false,"native_text":"8380000 - 1000 = 8","native_tokens":17,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"12345 * 6789","alu_result":"83810205","emitted":" 83810205","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":9}},{"leg":"explicit","prompt":"999 + 111 - 222 =","expected":"888","dispatch_ok":true,"native_ok":false,"native_text":"?\n\n999 + 111 =","native_tokens":12,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"999 + 111 - 222","alu_result":"888","emitted":" 888","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":4}},{"leg":"explicit","prompt":"858358354868358358358358 + 141641645131641641641641 =","expected":"999999999999999999999999","dispatch_ok":true,"native_ok":false,"native_text":"?\n\nCalculation:\n\n858358354868358358358358 + 1","native_tokens":33,"telemetry":{"fire":"tier0","path":"forced_explicit","expression":"858358354868358358358358 + 141641645131641641641641","alu_result":"999999999999999999999999","emitted":" 999999999999999999999999","termination":"schedule_end","verify":"skipped","flags":[],"rewrite_tokens":0,"answer_tokens":25}},{"leg":"distractor","prompt":"My phone number is 4415550172.","fire":"no"},{"leg":"distractor","prompt":"The meeting is on 2026-06-11.","fire":"no"},{"leg":"distractor","prompt":"Train 9 departs at 18:45 from platform 3.","fire":"no"},{"leg":"distractor","prompt":"Order 66 was executed in 19 BBY.","fire":"no"},{"leg":"distractor","prompt":"Account 123456789012345678901234567890 is active.","fire":"no"},{"leg":"distractor","prompt":"What is the capital of France?","fire":"no"},{"leg":"distractor","prompt":"It takes 5 - 10 business days.","fire":"no"},{"leg":"distractor","prompt":"I work a 9 - 5 job.","fire":"no"},{"leg":"distractor","prompt":"a 4 x 4 truck parked outside","fire":"no"},{"leg":"distractor","prompt":"Are you available 9 - 5?","fire":"no"},{"leg":"distractor","prompt":"dated 2026 - 06 - 11 in the ledger","fire":"no"}]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"experiment":"fr3_explicit_rewrite","vindex":"output/gemma3-4b-q4k-v2.vindex","synonym_top1":[6,6],"phrasing_top1":[6,6],"distractor_fires":[0,3],"cases":[{"w":"seat","bucket":"synonym","expected":"capital","top1":"capital","rank":1},{"w":"metropolis","bucket":"synonym","expected":"capital","top1":"capital","rank":1},{"w":"money","bucket":"synonym","expected":"currency","top1":"currency","rank":1},{"w":"cash","bucket":"synonym","expected":"currency","top1":"currency","rank":1},{"w":"tongue","bucket":"synonym","expected":"language","top1":"language","rank":1},{"w":"speech","bucket":"synonym","expected":"language","top1":"language","rank":1},{"w":"head city","bucket":"phrasing","expected":"capital","top1":"capital","rank":1},{"w":"main city","bucket":"phrasing","expected":"capital","top1":"capital","rank":1},{"w":"legal tender","bucket":"phrasing","expected":"currency","top1":"currency","rank":1},{"w":"unit of money","bucket":"phrasing","expected":"currency","top1":"currency","rank":1},{"w":"spoken language","bucket":"phrasing","expected":"language","top1":"language","rank":1},{"w":"mother tongue","bucket":"phrasing","expected":"language","top1":"language","rank":1},{"w":"banana","bucket":"distractor","expected":"","top1":"none","rank":-1},{"w":"weather","bucket":"distractor","expected":"","top1":"none","rank":-1},{"w":"altitude","bucket":"distractor","expected":"","top1":"none","rank":-1}]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"experiment":"fr3_template_ablation","vindex":"output/gemma3-4b-q4k-v2.vindex","n_entities":6,"held_out_template":"The {r} for {e} would be","layers":[{"layer":6,"acc_k1":0.3889,"acc_k2":0.3333,"acc_k4":0.8333},{"layer":10,"acc_k1":0.3333,"acc_k2":0.3889,"acc_k4":0.3889},{"layer":14,"acc_k1":0.3333,"acc_k2":0.3333,"acc_k4":0.3333},{"layer":20,"acc_k1":0.1667,"acc_k2":0.1111,"acc_k4":0.1667}]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"experiment":"fr_early_exit_decode_projection","vindex":"output/gemma3-4b-q4k-v2.vindex","install_layer":24,"num_layers":34,"fired":12,"full_ms":466.4905,"early_ms":344.9526,"per_token_speedup":1.3523}