AllenInstitute · BrianWhitneyAI · May 21, 2026 · Apr 9, 2026 · Apr 15, 2026 · Apr 15, 2026
@@ -0,0 +1,97 @@
+name: Query Benchmark
+
+on:
+  workflow_dispatch:
+    inputs:
+      base_branch:
+        description: "Base branch (reference)"
+        required: true
+        type: string
+        default: "main"
+      compare_branch:
+        description: "Branch to compare against base"
+        required: true
+        type: string
+
+# Only run one benchmark at a time per base+compare pair
+concurrency:
+  group: benchmark-${{ github.event.inputs.base_branch }}-${{ github.event.inputs.compare_branch }}
+  cancel-in-progress: true
+
+jobs:
+  benchmark:
+    name: Benchmark (${{ github.event.inputs.base_branch }} vs ${{ github.event.inputs.compare_branch }})
+    runs-on: ubuntu-latest
+    # Both branches run sequentially in a single job on the same VM. This is intentional:
+    # if each branch ran in its own job, GitHub could schedule them on different physical
+    # machines with different CPU speeds, cache sizes, or competing workloads. A ~15%
+    # hardware variance between VMs would mask the small regressions we actually care about.
+    # Running back-to-back on the same VM ensures both measurements share the same hardware
+    # baseline, so deltas reflect code differences only.
+    # 60 minutes: large-scale table creation (10M rows) + full query suite × 2 branches
+    timeout-minutes: 60
+
+    steps:
+      # -----------------------------------------------------------------------
+      # Base branch
+      # -----------------------------------------------------------------------
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.base_branch }}
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: "npm"
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install Playwright Chromium
+        run: npx playwright install chromium --with-deps
+        working-directory: packages/web
+
+      - name: Run benchmark (${{ github.event.inputs.base_branch }})
+        run: node scripts/run-benchmark.js
+        working-directory: packages/web
+        env:
+          BENCHMARK_BRANCH: ${{ github.event.inputs.base_branch }}
+
+      - name: Save base results
+        run: mv packages/web/benchmark-results-*.json /tmp/benchmark-base.json
+
+      # -----------------------------------------------------------------------
+      # Compare branch
+      # -----------------------------------------------------------------------
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.compare_branch }}
+          # Preserve untracked files (e.g. node_modules) to speed up npm ci
+          clean: false
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run benchmark (${{ github.event.inputs.compare_branch }})
+        run: node scripts/run-benchmark.js
+        working-directory: packages/web
+        env:
+          BENCHMARK_BRANCH: ${{ github.event.inputs.compare_branch }}
+
+      # -----------------------------------------------------------------------
+      # Compare results
+      # -----------------------------------------------------------------------
+      - name: Generate comparison
+        run: |
+          COMPARE_FILE=$(ls packages/web/benchmark-results-*.json | head -1)
+          node packages/web/scripts/compare-results.js /tmp/benchmark-base.json "$COMPARE_FILE" >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: benchmark-results
+          path: |
+            /tmp/benchmark-base.json
+            packages/web/benchmark-results-*.json
+          retention-days: 7
@@ -14,3 +14,6 @@ build
 *.tgz
 .env
 mise.toml
+
+# Benchmark runner output — generated by CI, not source
+packages/web/benchmark-results*.json
diff --git a/packages/core/services/AnnotationService/DatabaseAnnotationService/index.ts b/packages/core/services/AnnotationService/DatabaseAnnotationService/index.ts
@@ -9,6 +9,29 @@ import FileFilter from "../../../entity/FileFilter";
 import IncludeFilter from "../../../entity/FileFilter/IncludeFilter";
 import SQLBuilder from "../../../entity/SQLBuilder";
 
+/**
+ * SQL used by fetchFilteredValuesForAnnotation — exported so the benchmark can run the same query.
+ * Filters are optional; pass none for an unfiltered distinct-values query.
+ */
+export function buildDistinctValuesSQL(
+    annotation: string,
+    dataSourceNames: string | string[],
+    filters: FileFilter[] = []
+): string {
+    const builder = new SQLBuilder().select(`DISTINCT "${annotation}"`).from(dataSourceNames);
+    const filtersByAnnotation = filters.reduce(
+        (map, filter) => ({
+            ...map,
+            [filter.name]: map[filter.name] ? [...map[filter.name], filter] : [filter],
+        }),
+        {} as { [name: string]: FileFilter[] }
+    );
+    Object.values(filtersByAnnotation).forEach((appliedFilters) => {
+        builder.where(appliedFilters.map((f) => f.toSQLWhereString()).join(" OR "));
+    });
+    return builder.toSQL();
+}
+
 interface Config {
     databaseService: DatabaseService;
     dataSourceNames: string[];
@@ -112,18 +135,10 @@ export default class DatabaseAnnotationService implements AnnotationService {
             return [];
         }
 
-        const sqlBuilder = new SQLBuilder()
-            .select(`DISTINCT "${annotation}"`)
-            .from(this.dataSourceNames);
-
-        Object.keys(filtersByAnnotation).forEach((annotationToFilter) => {
-            const appliedFilters = filtersByAnnotation[annotationToFilter];
-            sqlBuilder.where(
-                appliedFilters.map((filter) => filter.toSQLWhereString()).join(" OR ")
-            );
-        });
-
-        const rows = await this.databaseService.query(sqlBuilder.toSQL()).promise;
+        const allFilters = Object.values(filtersByAnnotation).flat();
+        const rows = await this.databaseService.query(
+            buildDistinctValuesSQL(annotation, this.dataSourceNames, allFilters)
+        ).promise;
         const rowsSplitByDelimiter = rows
             .flatMap((row) =>
                 isNil(row[annotation])

diff --git a/packages/core/services/DatabaseService/index.ts b/packages/core/services/DatabaseService/index.ts
@@ -81,6 +81,16 @@ export function getParquetFileNameSelectPart(
     return `${getFileNameFromPathExpression(`"${pathColumn}"`)} AS "${PreDefinedColumn.FILE_NAME}"`;
 }
 
+/** SQL used by fetchAnnotations — exported so the benchmark can run the same query. */
+export function buildFetchAnnotationsSQL(tableName: string): string {
+    return new SQLBuilder()
+        .select("column_name, data_type")
+        .from('information_schema"."columns')
+        .where(`table_name = '${tableName}'`)
+        .where(`column_name != '${HIDDEN_UID_ANNOTATION}'`)
+        .toSQL();
+}
+
 export async function initializeDuckDB(logLevel: duckdb.LogLevel): Promise<duckdb.AsyncDuckDB> {
     const allBundles = duckdb.getJsDelivrBundles();
 
@@ -1032,12 +1042,7 @@ export default abstract class DatabaseService {
             ?.some((annotation) => !!annotation.description);
         const shouldHaveDescriptions = dataSourceNames.includes(this.SOURCE_METADATA_TABLE);
         if (!hasAnnotations || (!hasDescriptions && shouldHaveDescriptions)) {
-            const sql = new SQLBuilder()
-                .select("column_name, data_type")
-                .from('information_schema"."columns')
-                .where(`table_name = '${aggregateDataSourceName}'`)
-                .where(`column_name != '${HIDDEN_UID_ANNOTATION}'`)
-                .toSQL();
+            const sql = buildFetchAnnotationsSQL(aggregateDataSourceName);
             const rows = await this.query(sql).promise;
             if (isEmpty(rows)) {
                 throw new Error(`Unable to fetch annotations for ${aggregateDataSourceName}`);

diff --git a/packages/core/services/FileService/DatabaseFileService/index.ts b/packages/core/services/FileService/DatabaseFileService/index.ts
@@ -18,6 +18,34 @@ import FileDetail from "../../../entity/FileDetail";
 import SQLBuilder from "../../../entity/SQLBuilder";
 import { Environment, HIDDEN_UID_ANNOTATION } from "../../../constants";
 
+/**
+ * SQL used by getFiles — exported so the benchmark can run the same query.
+ * `from` is a page index (0-based); the row offset is `from * limit`.
+ */
+export function buildGetFilesSQL(
+    dataSourceNames: string | string[],
+    fileSet: FileSet,
+    from: number,
+    limit: number
+): string {
+    return fileSet
+        .toQuerySQLBuilder()
+        .from(dataSourceNames)
+        .offset(from * limit)
+        .limit(limit)
+        .toSQL();
+}
+
+/** SQL used by getCountOfMatchingFiles — exported so the benchmark can run the same query. */
+export function buildGetCountSQL(dataSourceNames: string | string[], fileSet: FileSet): string {
+    return fileSet
+        .toQuerySQLBuilder()
+        .select("COUNT(*) AS num_files")
+        .from(dataSourceNames)
+        .removeOrderBy()
+        .toSQL();
+}
+
 interface Config {
     databaseService: DatabaseService;
     dataSourceNames: string[];
@@ -93,13 +121,7 @@ export default class DatabaseFileService implements FileService {
         }
 
         const select_key = "num_files";
-        const sql = fileSet
-            .toQuerySQLBuilder()
-            .select(`COUNT(*) AS ${select_key}`)
-            .from(this.dataSourceNames)
-            // Remove sort if present
-            .removeOrderBy()
-            .toSQL();
+        const sql = buildGetCountSQL(this.dataSourceNames, fileSet);
 
         const rows = await this.databaseService.query(sql).promise;
         return parseInt(rows[0][select_key], 10);
@@ -125,12 +147,12 @@ export default class DatabaseFileService implements FileService {
         if (!this.dataSourceNames.length) {
             return [];
         }
-        const sql = request.fileSet
-            .toQuerySQLBuilder()
-            .from(this.dataSourceNames)
-            .offset(request.from * request.limit)
-            .limit(request.limit)
-            .toSQL();
+        const sql = buildGetFilesSQL(
+            this.dataSourceNames,
+            request.fileSet,
+            request.from,
+            request.limit
+        );
 
         const rows = await this.databaseService.query(sql).promise;
         const env = this.downloadService.getEnvironmentFromUrl();

@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <title>BFF Benchmark</title>
+</head>
+<body>
+    <p id="status">Starting...</p>
+</body>
+</html>