AllenInstitute · pgarrison · May 29, 2026 · May 19, 2026 · May 6, 2026 · May 28, 2026
@@ -36,8 +36,8 @@ jobs:
     # hardware variance between VMs would mask the small regressions we actually care about.
     # Running back-to-back on the same VM ensures both measurements share the same hardware
     # baseline, so deltas reflect code differences only.
-    # 180 minutes: fixture download + full task suite (including change_grouping on 10m) × 2 branches
-    timeout-minutes: 180
+    # 360 minutes is the maximum timeout
+    timeout-minutes: 360
 
     steps:
       - uses: actions/checkout@v4
@@ -51,7 +51,7 @@ jobs:
         uses: actions/cache@v4
         with:
           path: packages/web/fixtures
-          key: benchmark-fixtures-v1
+          key: benchmark-fixtures-v1.2
 
       - name: Download benchmark fixtures
         if: steps.fixture-cache.outputs.cache-hit != 'true'
@@ -61,6 +61,8 @@ jobs:
           curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet
           curl -fL "$BASE/synthetic-1m.parquet"   -o packages/web/fixtures/synthetic-1m.parquet
           curl -fL "$BASE/synthetic-10m.parquet"  -o packages/web/fixtures/synthetic-10m.parquet
+          curl -fL "$BASE/synthetic-10m-copy.parquet"  -o packages/web/fixtures/synthetic-10m-copy.parquet
+          curl -fL "$BASE/synthetic-20m.parquet"  -o packages/web/fixtures/synthetic-20m.parquet
 
       - uses: actions/setup-node@v4
         with:
@@ -75,7 +77,7 @@ jobs:
         working-directory: packages/web
 
       - name: Run benchmark (${{ github.event.inputs.compare_branch }})
-        run: node scripts/run-regression.js --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
+        run: npm run benchmark:regression -- --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
         working-directory: packages/web
         env:
           BENCHMARK_BRANCH: ${{ github.event.inputs.compare_branch }}
@@ -92,15 +94,16 @@ jobs:
         run: npm ci
 
       - name: Run benchmark (${{ github.event.inputs.base_branch }})
-        run: node scripts/run-regression.js --skip-build --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
+        run: npm run benchmark:regression -- --skip-build --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
         working-directory: packages/web
         env:
           BENCHMARK_BRANCH: ${{ github.event.inputs.base_branch }}
 
       - name: Generate comparison
         run: |
-          BASE_FILE=$(ls packages/web/benchmark-results-*.json | head -1)
-          node packages/web/scripts/compare-results.js "$BASE_FILE" /tmp/benchmark-compare.json >> "$GITHUB_STEP_SUMMARY"
+          BASE_FILE=$(ls benchmark-results-*.json | head -1)
+          npm run benchmark:compare -- "$BASE_FILE" /tmp/benchmark-compare.json >> "$GITHUB_STEP_SUMMARY"
+        working-directory: packages/web
 
       - name: Upload results
         if: always()

@@ -25,6 +25,8 @@ mkdir -p packages/web/fixtures
 curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet
 curl -fL "$BASE/synthetic-1m.parquet"   -o packages/web/fixtures/synthetic-1m.parquet
 curl -fL "$BASE/synthetic-10m.parquet"  -o packages/web/fixtures/synthetic-10m.parquet
+cp packages/web/fixtures/synthetic-10m.parquet packages/web/fixtures/synthetic-10m-copy.parquet
+curl -fL "$BASE/synthetic-20m.parquet"  -o packages/web/fixtures/synthetic-20m.parquet
 ```
 
 **Run against local fixtures**
@@ -62,7 +64,7 @@ This prints a Markdown table with p50 deltas and regression/improvement badges (
 | Flag | Description |
 |---|---|
 | `--local` | Use fixtures from `packages/web/fixtures/` instead of S3 URLs |
-| `--scale 100k\|1m\|10m` | Run a single fixture size |
+| `--scale 100k\|1m\|10m\|10m+10m\|20m` | Run a single fixture size |
 | `--full` | Run all scales with both cloud and local sources side-by-side |
 | `--iterations N` | Timed iterations per task (default 5) |
 | `--warmup N` | Warmup rounds before timing (default 1) |
@@ -82,10 +84,10 @@ Both branches run on the same machine to eliminate hardware variance — a ~15%
 
 The workflow:
 1. Checks out the compare branch and downloads fixtures from S3 (cached by version)
-2. Runs `run-regression.js` → writes `benchmark-results-<compare>.json`
+2. Runs `run-regression.ts` → writes `benchmark-results-<compare>.json`
 3. Checks out the base branch (without wiping fixtures)
-4. Runs `run-regression.js` → writes `benchmark-results-<base>.json`
-5. Runs `compare-results.js` → posts the Markdown table to the step summary
+4. Runs `run-regression.ts` → writes `benchmark-results-<base>.json`
+5. Runs `compare-results.ts` → posts the Markdown table to the step summary
 
 ---