diff --git a/packages/app/src/components/inference/hooks/useInterpolatedTrendData.ts b/packages/app/src/components/inference/hooks/useInterpolatedTrendData.ts index e35dcd4b..cde86370 100644 --- a/packages/app/src/components/inference/hooks/useInterpolatedTrendData.ts +++ b/packages/app/src/components/inference/hooks/useInterpolatedTrendData.ts @@ -80,6 +80,15 @@ function rowToLightweightPoint(row: BenchmarkRow): InferenceData | null { ...(typeof entry.joules_per_total_token === 'number' ? { measuredJPerTotalToken: { y: entry.joules_per_total_token, roof: false } } : {}), + ...(typeof entry.prefill_avg_power_w === 'number' + ? { measuredPrefillAvgPower: { y: entry.prefill_avg_power_w, roof: false } } + : {}), + ...(typeof entry.decode_avg_power_w === 'number' + ? { measuredDecodeAvgPower: { y: entry.decode_avg_power_w, roof: false } } + : {}), + ...(typeof entry.joules_per_input_token === 'number' + ? { measuredJPerInputToken: { y: entry.joules_per_input_token, roof: false } } + : {}), }; return point; } diff --git a/packages/app/src/components/inference/inference-chart-config.json b/packages/app/src/components/inference/inference-chart-config.json index ad5e7726..d9a29181 100644 --- a/packages/app/src/components/inference/inference-chart-config.json +++ b/packages/app/src/components/inference/inference-chart-config.json @@ -91,10 +91,20 @@ "y_measuredAvgPower": "measuredAvgPower.y", "y_measuredAvgPower_label": "Measured Avg Power per GPU (W)", "y_measuredAvgPower_title": "Measured Average Power per GPU", + "y_measuredPrefillAvgPower": "measuredPrefillAvgPower.y", + "y_measuredPrefillAvgPower_label": "Measured Prefill Power per GPU (W)", + "y_measuredPrefillAvgPower_title": "Measured Prefill Power per GPU", + "y_measuredDecodeAvgPower": "measuredDecodeAvgPower.y", + "y_measuredDecodeAvgPower_label": "Measured Decode Power per GPU (W)", + "y_measuredDecodeAvgPower_title": "Measured Decode Power per GPU", "y_measuredJPerOutputToken": "measuredJPerOutputToken.y", "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)", "y_measuredJPerOutputToken_title": "Measured Joules per Output Token", "y_measuredJPerOutputToken_roofline": "lower_right", + "y_measuredJPerInputToken": "measuredJPerInputToken.y", + "y_measuredJPerInputToken_label": "Measured J per Input Token (J/tok)", + "y_measuredJPerInputToken_title": "Measured Joules per Input Token", + "y_measuredJPerInputToken_roofline": "lower_right", "y_measuredJPerTotalToken": "measuredJPerTotalToken.y", "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)", "y_measuredJPerTotalToken_title": "Measured Joules per Token (incl. prompt)", @@ -193,10 +203,20 @@ "y_measuredAvgPower": "measuredAvgPower.y", "y_measuredAvgPower_label": "Measured Avg Power per GPU (W)", "y_measuredAvgPower_title": "Measured Average Power per GPU", + "y_measuredPrefillAvgPower": "measuredPrefillAvgPower.y", + "y_measuredPrefillAvgPower_label": "Measured Prefill Power per GPU (W)", + "y_measuredPrefillAvgPower_title": "Measured Prefill Power per GPU", + "y_measuredDecodeAvgPower": "measuredDecodeAvgPower.y", + "y_measuredDecodeAvgPower_label": "Measured Decode Power per GPU (W)", + "y_measuredDecodeAvgPower_title": "Measured Decode Power per GPU", "y_measuredJPerOutputToken": "measuredJPerOutputToken.y", "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)", "y_measuredJPerOutputToken_title": "Measured Joules per Output Token", "y_measuredJPerOutputToken_roofline": "lower_left", + "y_measuredJPerInputToken": "measuredJPerInputToken.y", + "y_measuredJPerInputToken_label": "Measured J per Input Token (J/tok)", + "y_measuredJPerInputToken_title": "Measured Joules per Input Token", + "y_measuredJPerInputToken_roofline": "lower_left", "y_measuredJPerTotalToken": "measuredJPerTotalToken.y", "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)", "y_measuredJPerTotalToken_title": "Measured Joules per Token (incl. prompt)", diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index 19887884..cbf64787 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -231,8 +231,11 @@ export interface InferenceData extends Partial { }); }); +// =========================================================================== +// createChartDataPoint — per-stage measured power / energy (disagg prefill/decode) +// =========================================================================== +describe('createChartDataPoint per-stage measured power fields', () => { + it('emits measuredPrefillAvgPower when prefill_avg_power_w is present', () => { + const e = entry({ prefill_avg_power_w: 920.3 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredPrefillAvgPower).toBeDefined(); + expect(point.measuredPrefillAvgPower!.y).toBe(920.3); + expect(point.measuredPrefillAvgPower!.roof).toBe(false); + }); + + it('emits measuredDecodeAvgPower when decode_avg_power_w is present', () => { + const e = entry({ decode_avg_power_w: 612.1 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredDecodeAvgPower).toBeDefined(); + expect(point.measuredDecodeAvgPower!.y).toBe(612.1); + expect(point.measuredDecodeAvgPower!.roof).toBe(false); + }); + + it('emits measuredJPerInputToken when joules_per_input_token is present', () => { + const e = entry({ joules_per_input_token: 0.27 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredJPerInputToken).toBeDefined(); + expect(point.measuredJPerInputToken!.y).toBe(0.27); + expect(point.measuredJPerInputToken!.roof).toBe(false); + }); + + it('omits all per-stage fields on legacy rows predating per-stage attribution', () => { + // Single-node / pre-disagg runs emit avg_power_w only, no prefill/decode split. + const e = entry({ avg_power_w: 685.5 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredPrefillAvgPower).toBeUndefined(); + expect(point.measuredDecodeAvgPower).toBeUndefined(); + expect(point.measuredJPerInputToken).toBeUndefined(); + }); + + it('emits prefill and decode independently — the disagg per-stage split', () => { + // GB300 disagg: prefill GPUs run compute-bound (higher W) than decode GPUs. + const e = entry({ prefill_avg_power_w: 948, decode_avg_power_w: 631 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredPrefillAvgPower!.y).toBe(948); + expect(point.measuredDecodeAvgPower!.y).toBe(631); + expect(point.measuredPrefillAvgPower!.y).toBeGreaterThan(point.measuredDecodeAvgPower!.y); + }); + + it('preserves a zero per-stage power value (not falsy-coerced away)', () => { + // Same typeof===number gate as total power — 0 W must survive, not be dropped. + const e = entry({ prefill_avg_power_w: 0, decode_avg_power_w: 0 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredPrefillAvgPower).toBeDefined(); + expect(point.measuredPrefillAvgPower!.y).toBe(0); + expect(point.measuredDecodeAvgPower).toBeDefined(); + expect(point.measuredDecodeAvgPower!.y).toBe(0); + }); + + it('carries total and per-stage power together on a full disagg row', () => { + const e = entry({ + avg_power_w: 853, + prefill_avg_power_w: 948, + decode_avg_power_w: 631, + joules_per_input_token: 0.18, + joules_per_output_token: 1.64, + }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredAvgPower!.y).toBe(853); + expect(point.measuredPrefillAvgPower!.y).toBe(948); + expect(point.measuredDecodeAvgPower!.y).toBe(631); + expect(point.measuredJPerInputToken!.y).toBe(0.18); + expect(point.measuredJPerOutputToken!.y).toBe(1.64); + }); +}); + // =========================================================================== // createChartDataPoint — boolean narrowing for prefill/decode dp_attention, is_multinode // =========================================================================== diff --git a/packages/app/src/lib/chart-utils.ts b/packages/app/src/lib/chart-utils.ts index 980b4c7b..a67857b2 100644 --- a/packages/app/src/lib/chart-utils.ts +++ b/packages/app/src/lib/chart-utils.ts @@ -151,8 +151,11 @@ export const Y_AXIS_METRICS = [ // Measured power / energy (sourced from runner's aggregate_power.py output; // distinct from the spec-sheet TDP-derived jTotal/jOutput/jInput above). 'y_measuredAvgPower', + 'y_measuredPrefillAvgPower', + 'y_measuredDecodeAvgPower', 'y_measuredJPerOutputToken', 'y_measuredJPerTotalToken', + 'y_measuredJPerInputToken', ] as const; export type YAxisMetric = (typeof Y_AXIS_METRICS)[number]; @@ -401,12 +404,21 @@ export function createChartDataPoint( ...(typeof entry.avg_power_w === 'number' ? { measuredAvgPower: { y: entry.avg_power_w, roof: false } } : {}), + ...(typeof entry.prefill_avg_power_w === 'number' + ? { measuredPrefillAvgPower: { y: entry.prefill_avg_power_w, roof: false } } + : {}), + ...(typeof entry.decode_avg_power_w === 'number' + ? { measuredDecodeAvgPower: { y: entry.decode_avg_power_w, roof: false } } + : {}), ...(typeof entry.joules_per_output_token === 'number' ? { measuredJPerOutputToken: { y: entry.joules_per_output_token, roof: false } } : {}), ...(typeof entry.joules_per_total_token === 'number' ? { measuredJPerTotalToken: { y: entry.joules_per_total_token, roof: false } } : {}), + ...(typeof entry.joules_per_input_token === 'number' + ? { measuredJPerInputToken: { y: entry.joules_per_input_token, roof: false } } + : {}), }; } @@ -569,8 +581,11 @@ export const calculateRoofline = ( | `jOutput.y` | `jInput.y` | `measuredAvgPower.y` + | `measuredPrefillAvgPower.y` + | `measuredDecodeAvgPower.y` | `measuredJPerOutputToken.y` - | `measuredJPerTotalToken.y`, + | `measuredJPerTotalToken.y` + | `measuredJPerInputToken.y`, rooflineDirection: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right', ): InferenceData[] => { const pointsForRoofline = points.map((p) => { @@ -642,8 +657,11 @@ export function computeAllRooflines( | `jOutput.y` | `jInput.y` | `measuredAvgPower.y` + | `measuredPrefillAvgPower.y` + | `measuredDecodeAvgPower.y` | `measuredJPerOutputToken.y` - | `measuredJPerTotalToken.y`, + | `measuredJPerTotalToken.y` + | `measuredJPerInputToken.y`, rooflineDirection, ); } @@ -688,8 +706,11 @@ export function markRooflinePoints( if (newPoint.jOutput) newPoint.jOutput.roof = false; if (newPoint.jInput) newPoint.jInput.roof = false; if (newPoint.measuredAvgPower) newPoint.measuredAvgPower.roof = false; + if (newPoint.measuredPrefillAvgPower) newPoint.measuredPrefillAvgPower.roof = false; + if (newPoint.measuredDecodeAvgPower) newPoint.measuredDecodeAvgPower.roof = false; if (newPoint.measuredJPerOutputToken) newPoint.measuredJPerOutputToken.roof = false; if (newPoint.measuredJPerTotalToken) newPoint.measuredJPerTotalToken.roof = false; + if (newPoint.measuredJPerInputToken) newPoint.measuredJPerInputToken.roof = false; for (const chartDefYKey of Y_AXIS_METRICS) { const rooflinePoints = computedRooflines[hwKey]?.[chartDefYKey]; @@ -751,6 +772,13 @@ export function markRooflinePoints( newPoint.jInput.roof = onCurrentRoofline; } else if (chartDefYKey === 'y_measuredAvgPower' && newPoint.measuredAvgPower) { newPoint.measuredAvgPower.roof = onCurrentRoofline; + } else if ( + chartDefYKey === 'y_measuredPrefillAvgPower' && + newPoint.measuredPrefillAvgPower + ) { + newPoint.measuredPrefillAvgPower.roof = onCurrentRoofline; + } else if (chartDefYKey === 'y_measuredDecodeAvgPower' && newPoint.measuredDecodeAvgPower) { + newPoint.measuredDecodeAvgPower.roof = onCurrentRoofline; } else if ( chartDefYKey === 'y_measuredJPerOutputToken' && newPoint.measuredJPerOutputToken @@ -758,6 +786,8 @@ export function markRooflinePoints( newPoint.measuredJPerOutputToken.roof = onCurrentRoofline; } else if (chartDefYKey === 'y_measuredJPerTotalToken' && newPoint.measuredJPerTotalToken) { newPoint.measuredJPerTotalToken.roof = onCurrentRoofline; + } else if (chartDefYKey === 'y_measuredJPerInputToken' && newPoint.measuredJPerInputToken) { + newPoint.measuredJPerInputToken.roof = onCurrentRoofline; } } finalProcessedData.push(newPoint);