SemiAnalysisAI · adibarra · May 3, 2026 · May 3, 2026
@@ -0,0 +1,114 @@
+import { describe, it, expect } from 'vitest';
+
+import type { GithubArtifact } from '@/lib/github-artifacts';
+import { type EvalArtifactConfig, findEvalSampleArtifact } from '@/lib/eval-samples-live';
+
+function makeArtifact(name: string, id = 1): GithubArtifact {
+  return {
+    id,
+    name,
+    archive_download_url: `https://example.com/${name}.zip`,
+  } as GithubArtifact;
+}
+
+const baseConfig: EvalArtifactConfig = {
+  model: 'dsr1',
+  framework: 'sglang',
+  hardware: 'mi355x',
+  precision: 'fp4',
+  specMethod: 'mtp',
+  disagg: false,
+  conc: 128,
+};
+
+describe('findEvalSampleArtifact', () => {
+  it('matches a single-conc non-disagg artifact', () => {
+    const artifacts = [
+      makeArtifact(
+        'eval_dsr1_8k1k_dsr1_8k1k_fp4_sglang_tp8-ep1-dpafalse_disagg-false_spec-mtp_conc128_mi355x-amds_01',
+      ),
+    ];
+    const result = findEvalSampleArtifact(artifacts, baseConfig);
+    expect(result?.id).toBe(1);
+  });
+
+  it('accepts the legacy `sglang-disagg` alias when the config framework is `mori-sglang`', () => {
+    // Eval rows are normalized via FRAMEWORK_ALIASES (sglang-disagg → mori-sglang),
+    // but artifact names keep the raw alias. The matcher must accept either.
+    const artifacts = [
+      makeArtifact(
+        'eval_dsr1_8k1k_dsr1_8k1k_fp4_sglang-disagg_prefill-tp8-ep1-dpfalse-nw1_decode-tp8-ep1-dpfalse-nw2_disagg-true_spec-mtp_conc64x128x256_mi355x-amds_08',
+      ),
+    ];
+    const result = findEvalSampleArtifact(artifacts, {
+      ...baseConfig,
+      framework: 'mori-sglang',
+      disagg: true,
+      conc: 128,
+    });
+    expect(result?.id).toBe(1);
+  });
+
+  it('matches a conc value embedded in an x-separated list (disagg artifacts)', () => {
+    const artifacts = [
+      makeArtifact(
+        'eval_dsr1_8k1k_dsr1_8k1k_fp4_sglang-disagg_prefill-tp8-ep8-dptrue-nw2_decode-tp8-ep8-dptrue-nw1_disagg-true_spec-mtp_conc1024x2048x4096_mi355x-amds_06',
+      ),
+    ];
+    const result = findEvalSampleArtifact(artifacts, {
+      ...baseConfig,
+      framework: 'mori-sglang',
+      disagg: true,
+      conc: 2048,
+    });
+    expect(result?.id).toBe(1);
+  });
+
+  it('rejects when the requested conc is not in the list', () => {
+    const artifacts = [
+      makeArtifact(
+        'eval_dsr1_8k1k_dsr1_8k1k_fp4_sglang-disagg_prefill-tp8-ep8-dptrue-nw2_decode-tp8-ep8-dptrue-nw1_disagg-true_spec-mtp_conc1024x2048x4096_mi355x-amds_06',
+      ),
+    ];
+    const result = findEvalSampleArtifact(artifacts, {
+      ...baseConfig,
+      framework: 'mori-sglang',
+      disagg: true,
+      conc: 64,
+    });
+    expect(result).toBeNull();
+  });
+
+  it('avoids substring conc collisions (conc=12 must not match conc128)', () => {
+    const artifacts = [
+      makeArtifact(
+        'eval_dsr1_8k1k_dsr1_8k1k_fp4_sglang_tp8-ep1-dpafalse_disagg-false_spec-mtp_conc128_mi355x-amds_01',
+      ),
+    ];
+    const result = findEvalSampleArtifact(artifacts, { ...baseConfig, conc: 12 });
+    expect(result).toBeNull();
+  });
+
+  it('skips eval_results_ and eval_gpu_metrics_ artifacts', () => {
+    const artifacts = [
+      makeArtifact('eval_results_all'),
+      makeArtifact('eval_gpu_metrics_dsr1_8k1k_fp4_sglang_spec-mtp_conc128_mi355x-amds'),
+    ];
+    expect(findEvalSampleArtifact(artifacts, baseConfig)).toBeNull();
+  });
+
+  it('prefers artifacts whose disagg token matches the config', () => {
+    const artifacts = [
+      makeArtifact(
+        'eval_dsr1_8k1k_dsr1_8k1k_fp4_sglang_tp8-ep1_disagg-false_spec-mtp_conc128_mi355x-amds_01',
+        1,
+      ),
+      makeArtifact(
+        'eval_dsr1_8k1k_dsr1_8k1k_fp4_sglang_tp8-ep1_disagg-true_spec-mtp_conc128_mi355x-amds_02',
+        2,
+      ),
+    ];
+    const result = findEvalSampleArtifact(artifacts, { ...baseConfig, disagg: true });
+    expect(result?.id).toBe(2);
+  });
+});
@@ -8,6 +8,8 @@
  * ingest does. No caching — same policy as `/api/unofficial-run`, since GHA
  * artifacts can change while a workflow is still running.
  */
+import { resolveFrameworkAliasesInString } from '@semianalysisai/inferencex-constants/framework-aliases';
+
 import {
   type GithubArtifact,
   downloadGithubArtifact,
@@ -30,6 +32,17 @@ export interface EvalArtifactConfig {
   conc: number | null;
 }
 
+/**
+ * Check whether the artifact's `_conc<N>_` or `_conc<N>x<N>x<...>_` segment
+ * lists `targetConc`. Disagg artifacts pack multiple concurrencies into a
+ * single zip; non-disagg artifacts encode a single conc value.
+ */
+function artifactConcMatches(artifactName: string, targetConc: number): boolean {
+  const m = artifactName.match(/_conc(\d+(?:x\d+)*)_/);
+  if (!m) return false;
+  return m[1].split('x').includes(String(targetConc));
+}
+
 /**
  * Pick the per-config eval artifact matching `config` from a run's artifact list.
  *
@@ -40,6 +53,15 @@ export interface EvalArtifactConfig {
  * `EvalRow`, so when multiple artifacts differ only in sequence length we pick
  * the highest-id (most recent) match. Excludes the aggregate (`eval_results_all`)
  * and gpu-metrics artifacts which share the `eval_` prefix but don't carry samples.
+ *
+ * Two normalization quirks the matcher has to undo:
+ * - The eval row's `framework` is canonicalized via `FRAMEWORK_ALIASES`
+ *   (e.g. `sglang-disagg` → `mori-sglang`), but the artifact name keeps the
+ *   raw alias. We canonicalize the artifact name via `resolveFrameworkAliasesInString`
+ *   before comparing.
+ * - Disagg artifacts pack multiple concurrencies into one zip and encode them
+ *   as `conc<N>x<N>x<N>`, so we parse the conc segment as an x-separated list
+ *   and check membership instead of requiring an exact `_conc<N>_` token.
  */
 export function findEvalSampleArtifact(
   artifacts: GithubArtifact[],
@@ -57,15 +79,18 @@ export function findEvalSampleArtifact(
     `_${config.hardware}-`,
     `_spec-${config.specMethod}_`,
   ];
-  if (config.conc !== null) required.push(`_conc${config.conc}_`);
   // Preferred token — used as a tiebreaker when more than one artifact matches.
   const preferredDisagg = `_disagg-${config.disagg ? 'true' : 'false'}_`;
 
   const matches = artifacts.filter((a) => {
-    const n = a.name.toLowerCase();
+    // Canonicalize legacy framework substrings (e.g. `sglang-disagg` → `mori-sglang`)
+    // so the framework token matches what the eval row was normalized to.
+    const n = resolveFrameworkAliasesInString(a.name.toLowerCase());
     if (!n.startsWith('eval_')) return false;
     if (n.startsWith('eval_results_') || n.startsWith('eval_gpu_metrics_')) return false;
-    return required.every((t) => n.includes(t.toLowerCase()));
+    if (!required.every((t) => n.includes(t.toLowerCase()))) return false;
+    if (config.conc !== null && !artifactConcMatches(n, config.conc)) return false;
+    return true;
   });
   if (matches.length === 0) return null;
   // Prefer artifacts whose disagg flag matches the row, then fall back to newest.