diff --git a/torchci/components/benchmark/v3/components/common/StickyBar.tsx b/torchci/components/benchmark/v3/components/common/StickyBar.tsx new file mode 100644 index 0000000000..71f36ab036 --- /dev/null +++ b/torchci/components/benchmark/v3/components/common/StickyBar.tsx @@ -0,0 +1,96 @@ +import { Box } from "@mui/system"; +import { useEffect, useRef, useState } from "react"; + +export type StickyBarProps = { + children: React.ReactNode; + height?: number; + offset: number; + zIndex?: number; + onMount?: (h: number) => void; + onUnmount?: (h: number) => void; + /** Horizontal alignment of content inside the bar */ + align?: "left" | "center" | "right"; + /** Should children keep their natural width ("fit") or stretch ("full") */ + contentMode?: "fit" | "full"; +}; + +export const StickyBar: React.FC = ({ + children, + height = 48, + offset, + zIndex = 900, + onMount, + onUnmount, + align = "left", + contentMode = "fit", +}) => { + const ref = useRef(null); + const [isSticky, setIsSticky] = useState(false); + + // Let parent know about mount/unmount (for stacking offset logic) + useEffect(() => { + onMount?.(height); + return () => onUnmount?.(height); + }, [height, onMount, onUnmount]); + + useEffect(() => { + const observer = new IntersectionObserver( + ([entry]) => { + // < 0.99 + setIsSticky(entry.intersectionRatio < 0.99); + }, + { + threshold: Array.from({ length: 101 }, (_, i) => i / 100), + // 0,0.01,0.02,...,1, + } + ); + + if (ref.current) observer.observe(ref.current); + return () => observer.disconnect(); + }, []); + + const justify = + align === "center" + ? "center" + : align === "right" + ? "flex-end" + : "flex-start"; + + return ( + <> + {/* Sentinel keeps layout height stable */} +
+ {/* Outer bar: full width, sticky */} + + {/* Inner container: controls how children size themselves */} + *": { flex: "0 0 auto", minWidth: "auto" }, // don’t stretch children + }} + > + {children} + + +
+ + ); +}; diff --git a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkChartSection.tsx b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/BenchmarkChartSection.tsx similarity index 51% rename from torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkChartSection.tsx rename to torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/BenchmarkChartSection.tsx index 28776f71e5..992dad73c6 100644 --- a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkChartSection.tsx +++ b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/BenchmarkChartSection.tsx @@ -1,13 +1,13 @@ import { Paper, Typography } from "@mui/material"; import { Box } from "@mui/system"; import { useMemo } from "react"; +import BenchmarkTimeSeriesChartGroup from "./components/BenchmarkTimeSeriesChartGroup"; import { BenchmarkChartSectionConfig, BenchmarkTimeSeriesInput, makeGroupKeyAndLabel, passesFilter, -} from "../helper"; -import BenchmarkTimeSeriesChartGroup from "./BenchmarkTimeSeriesChartGroup"; +} from "./helper"; const styles = { container: { @@ -52,7 +52,9 @@ export default function BenchmarkChartSection({ gi, chartSectionConfig.groupByFields ); - if (!m.has(key)) m.set(key, { key, labels, items: [] }); + if (!m.has(key)) { + m.set(key, { key, labels, items: [] }); + } m.get(key)!.items.push(s); } return m; @@ -63,34 +65,37 @@ export default function BenchmarkChartSection({ } return ( - - {Array.from(groupMap.entries()).map(([key, data]) => { - if (!data) return null; - const op = chartSectionConfig.chartGroup?.renderOptions; - const title = data.labels.join(" "); + + Time Series Chart Section + + {Array.from(groupMap.entries()).map(([key, data]) => { + if (!data) return null; + const op = chartSectionConfig.chartGroup?.renderOptions; + const title = data.labels.join(" "); - let renderOptions = chartSectionConfig.chartGroup?.renderOptions; - if (op && op.pass_section_title) { - renderOptions = { - ...renderOptions, - titleSuffix: `/${title}`, - }; - } - return ( - - - {title.toUpperCase()} - { - onChange?.(payload); - }} - /> - - - ); - })} + let renderOptions = chartSectionConfig.chartGroup?.renderOptions; + if (op && op.pass_section_title) { + renderOptions = { + ...renderOptions, + titleSuffix: `/${title}`, + }; + } + return ( + + + {title.toUpperCase()} + { + onChange?.(payload); + }} + /> + + + ); + })} + ); } diff --git a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTable.tsx b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTable.tsx new file mode 100644 index 0000000000..f3b2cb51e5 --- /dev/null +++ b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTable.tsx @@ -0,0 +1,87 @@ +import { Typography } from "@mui/material"; +import { DataGrid, GridColDef, GridRowModel } from "@mui/x-data-grid"; +import { useMemo } from "react"; +import { ComparisonTableConfig } from "../../../helper"; +import { getComparisionTableConlumnRendering } from "./ComparisonTableColumnRendering"; +import { SnapshotRow, ToComparisonTableRow } from "./ComparisonTableHelpers"; + +export function ComparisonTable({ + data, + lWorkflowId, + rWorkflowId, + config, + columnOrder, + title = "Group", +}: { + data: SnapshotRow[]; + lWorkflowId: string | null; + rWorkflowId: string | null; + config: ComparisonTableConfig; + columnOrder?: string[]; // optional preferred ordering of columns + title?: string; +}) { + // group raw data into rows, each row contains all values across workflowIds + const rows: GridRowModel[] = useMemo(() => { + return ToComparisonTableRow(config, data); + }, [data]); + + // iterate the column map in row data, and get all column names + const allColumns = useMemo(() => { + const s = new Set(); + rows.forEach((r) => + Object.values(r.byWorkflow).forEach((cols) => { + Object.keys(cols ?? {}).forEach((k) => s.add(k)); + }) + ); + const auto = Array.from(s).sort(); + if (!columnOrder || columnOrder.length === 0) return auto; + const head = columnOrder.filter((c) => s.has(c)); + const tail = auto.filter((c) => !head.includes(c)); + return [...head, ...tail]; + }, [rows, columnOrder]); + + // Form the columns + const columns: GridColDef[] = useMemo( + () => + getComparisionTableConlumnRendering( + allColumns, + lWorkflowId, + rWorkflowId, + config + ), + [allColumns, lWorkflowId, rWorkflowId, title] + ); + + return ( + <> + {title.toUpperCase()} + + {lWorkflowId} - {rWorkflowId} + + r.id} + sx={{ + "& .MuiDataGrid-cell": { + py: 0, // less vertical padding + fontSize: "0.75rem", + }, + "& .MuiDataGrid-columnHeaders": { + minHeight: 32, + maxHeight: 32, + lineHeight: "32px", + fontSize: "0.75rem", + }, + "& .MuiDataGrid-row": { + minHeight: 32, + maxHeight: 32, + }, + }} + hideFooter + /> + + ); +} diff --git a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx new file mode 100644 index 0000000000..0cf9fb38e6 --- /dev/null +++ b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx @@ -0,0 +1,177 @@ +import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; +import { IconButton, Tooltip, Typography } from "@mui/material"; +import { Box } from "@mui/system"; +import { + GridColDef, + GridRenderCellParams, + GridRowModel, +} from "@mui/x-data-grid"; +import { + BenchmarkComparisonPolicyConfig, + ComparisonResult, + evaluateComparison, +} from "components/benchmark/v3/configs/helpers/RegressionPolicy"; +import { ComparisonTableConfig } from "../../../helper"; +import { asNumber, valOf } from "./ComparisonTableHelpers"; + +/** + * + * @param allColumns + * @param lWorkflowId + * @param rWorkflowId + * @returns + */ +export function getComparisionTableConlumnRendering( + columnsFields: string[], + lWorkflowId: string | null, + rWorkflowId: string | null, + config: ComparisonTableConfig +): GridColDef[] { + const nameCol: GridColDef = { + field: "name", + headerName: "Name", + flex: 1.2, + sortable: false, + filterable: false, + renderCell: (p) => {p.row.name}, + }; + const metricCols: GridColDef[] = columnsFields.map((field) => ({ + field, + headerName: field, + flex: 1, + sortable: false, + filterable: false, + renderCell: (params: GridRenderCellParams) => ( + + ), + })); + const labelCol: GridColDef = { + field: "label", + headerName: "Label", + width: 10, + sortable: false, + filterable: false, + renderCell: (p) => ( + + + + + + ), + }; + return [nameCol, ...metricCols, labelCol]; +} + +/** Colors */ +const VIOLATE_RULE_COLOR = "#ffebee"; // red[50] +const IMPROVEMENT_COLOR = "#e8f5e9"; // green[50] + +/** + * + * @returns + */ +export function ComparisonTableValueCell({ + field, + row, + lWorkflowId, + rWorkflowId, + config, +}: { + field: string; + row: GridRowModel; + lWorkflowId: string | null; + rWorkflowId: string | null; + comparisonTargetField?: string; + config?: ComparisonTableConfig; +}) { + const L = valOf( + lWorkflowId + ? row.byWorkflow[lWorkflowId]?.[field]?.data?.[0] ?? + row.byWorkflow[lWorkflowId]?.[field] + : undefined + ); + const R = valOf( + rWorkflowId + ? row.byWorkflow[rWorkflowId]?.[field]?.data?.[0] ?? + row.byWorkflow[rWorkflowId]?.[field] + : undefined + ); + + const fmt = (v: any) => + v == null + ? "—" + : typeof v === "number" + ? Number(v).toFixed(2) + : String(v.toFixed(2)); + + const ln = asNumber(L); + const rn = asNumber(R); + + // get comparison policy for the field + const targetPolicyField = config?.comparisonPolicyTargetField; + let comparisonPolicy: BenchmarkComparisonPolicyConfig | undefined = undefined; + if (targetPolicyField && config?.comparisonPolicy) { + const fieldValue = row[targetPolicyField]; + comparisonPolicy = fieldValue + ? config?.comparisonPolicy[fieldValue] + : undefined; + } + + // evaluate comparison + const result = evaluateComparison( + comparisonPolicy?.target, + ln, + rn, + comparisonPolicy + ); + + let bgColor = ""; + switch (result.verdict) { + case "good": + bgColor = IMPROVEMENT_COLOR; + break; + case "regression": + bgColor = VIOLATE_RULE_COLOR; + break; + case "neutral": + default: + break; + } + + const text = + L == null && R == null + ? "N/A" + : L == null + ? `N/A→${fmt(R)}` + : R == null + ? `${fmt(L)}→N/A` + : fmt(L) === fmt(R) + ? `${fmt(L)}` + : `${fmt(L)}→${fmt(R)}`; + + return ( + + + {text} + + + ); +} + +function renderComparisonResult(result: ComparisonResult) { + return ( + + {Object.entries(result).map(([key, value]) => ( + + {key}: {String(value)} + + ))} + + ); +} diff --git a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableHelpers.tsx b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableHelpers.tsx new file mode 100644 index 0000000000..fa7c004e1b --- /dev/null +++ b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableHelpers.tsx @@ -0,0 +1,62 @@ +import { ComparisonTableConfig } from "../../../helper"; + +type GridRowModel = { + id: string; + label: string; + name: string; + metric: string; + byWorkflow: Record; + sampleInfo: any; +}; + +// used when find unique rows +const EXCLUDE_KEYS = ["workflow_id", "commit", "branch"]; +function getGroupKeyAndLabel(gi: any) { + const keys = Object.keys(gi ?? {}) + .filter((k) => !EXCLUDE_KEYS.includes(k)) + .sort(); + const key = keys.map((k) => `${k}=${String(gi?.[k])}`).join("|"); + const label = keys.map((k) => String(gi?.[k])).join(" · "); + return { key, label, metric: String(gi?.metric ?? "") }; +} + +export type RowCellObj = { + value: number | string | null | undefined; + [k: string]: any; +}; +export type SnapshotRow = { + group_info: any; + sub_keys: string[]; + group_keys: string[]; + rows: RowCellObj[]; +}; + +/** Helpers */ +export const asNumber = (v: unknown) => (typeof v === "number" ? v : undefined); +export const valOf = (cell?: RowCellObj) => (cell ? cell.value : undefined); + +export function ToComparisonTableRow(config: ComparisonTableConfig, data: any) { + const m = new Map(); + for (const rowData of data ?? []) { + const gi = rowData.group_info ?? {}; + const wf = String(gi?.workflow_id ?? ""); + const { key, label } = getGroupKeyAndLabel(gi); + + const name = config?.nameKeys + ? config.nameKeys.map((k) => gi[k]).join(" · ") + : label; + const rowDataMap = rowData.data ?? {}; + if (!m.has(key)) { + m.set(key, { + ...gi, + id: key, + label, + byWorkflow: {}, + sampleInfo: gi, + name, + }); + } + m.get(key)!.byWorkflow[wf] = rowDataMap; + } + return Array.from(m.values()); +} diff --git a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTableSection.tsx b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTableSection.tsx new file mode 100644 index 0000000000..49e63ab9a2 --- /dev/null +++ b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTableSection.tsx @@ -0,0 +1,118 @@ +import { Divider, Paper, Typography } from "@mui/material"; +import { Box, Grid } from "@mui/system"; +import { StickyBar } from "components/benchmark/v3/components/common/StickyBar"; +import { useMemo, useState } from "react"; +import { + BenchmarkComparisonTableSectionConfig, + passesFilter, + toGroupKeyMap, + toSortedWorkflowIdMap, +} from "../../helper"; +import { ComparisonTable } from "./BenchmarkTimeSeriesComparisonTable/ComparisonTable"; +import { BenchmarkTimeSeriesComparisonTableSlider } from "./BenchmarkTimeSeriesComparisonTableSlider"; + +const styles = { + container: { + flexGrow: 1, + }, + paper: { + p: 2, + elevation: 2, + borderRadius: 2, + }, +}; + +export default function BenchmarkTimeSeriesComparisonTableSection({ + data = [], + tableSectionConfig, + onChange, +}: { + data?: any[]; + tableSectionConfig: BenchmarkComparisonTableSectionConfig; + onChange?: (payload: any) => void; +}) { + // Sticky bar offset + const [barOffset, setBarOffset] = useState(-20); + const handleMount = (h: number) => setBarOffset((prev) => prev + h); + const handleUnmount = (h: number) => setBarOffset((prev) => prev - h); + + // Filter data based on the table config + const filtered = useMemo(() => { + if (!data) return []; + return data.filter((s) => + passesFilter(s.group_info || {}, tableSectionConfig.filterByFieldValues) + ); + }, [data, tableSectionConfig.filterByFieldValues]); + + // Group data based on the table config + const groupMap = useMemo( + () => toGroupKeyMap(filtered, tableSectionConfig.groupByFields), + [filtered, tableSectionConfig.groupByFields] + ); + + const workflowInfos: any[] = useMemo( + () => toSortedWorkflowIdMap(filtered), + [ + filtered, + tableSectionConfig.groupByFields, + tableSectionConfig.filterByFieldValues, + ] + ); + + const [lWorkflowId, setLlWorkflowId] = useState( + workflowInfos.length > 0 ? workflowInfos[0].workflow_id : null + ); + const [rWorkflowId, setRWorkflowId] = useState( + workflowInfos.length > 0 + ? workflowInfos[workflowInfos.length - 1].workflow_id + : null + ); + + const onSliderChange = (next: [string, string]) => { + setLlWorkflowId(next[0]); + setRWorkflowId(next[1]); + }; + + if (!filtered || filtered.length == 0) { + return <>; + } + + return ( + + Time Series Comparison Section + + + + + + {Array.from(groupMap.entries()).map(([key, tableData]) => { + if (!tableData) return null; + const title = tableData.labels.join(" "); + return ( + + + + + + ); + })} + + + ); +} diff --git a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTableSlider.tsx b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTableSlider.tsx new file mode 100644 index 0000000000..17fdea4be7 --- /dev/null +++ b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTableSlider.tsx @@ -0,0 +1,119 @@ +import styled from "@emotion/styled"; +import { Box, Chip, Paper, Slider, Stack, Typography } from "@mui/material"; +import { useCallback, useMemo, useState } from "react"; +import { shortSha } from "../../helper"; + +export type WorkflowMetaInfo = { + workflow_id: string; + commit: string; + branch: string; + [k: string]: any; +}; + +const BenchmarkSlider = styled(Slider)(({ theme }) => ({ + "& .MuiSlider-valueLabelLabel": { + whiteSpace: "pre-line", // <- allow \n + fontSize: 12, + lineHeight: 1.25, + padding: 0, + display: "block", // makes JSX work too + }, +})); + +const fmtTs = (ts?: string) => { + if (!ts) return "—"; + const d = new Date(ts); + return isNaN(d.getTime()) ? ts : d.toLocaleString(); +}; + +export function BenchmarkTimeSeriesComparisonTableSlider({ + workflows, + onChange, +}: { + workflows: WorkflowMetaInfo[]; + onChange: (next: [string, string]) => void; +}) { + // sort & map + const { ids, byId } = useMemo(() => { + const byId: Record = {}; + workflows.forEach((it) => (byId[it.workflow_id] = it)); + const ids = workflows.map((it) => it.workflow_id); + return { ids, byId }; + }, [workflows]); + + // Controlled slider range (indices) + const [range, setRange] = useState<[number, number]>(() => { + const n = workflows.length; + return n >= 2 ? [0, n - 1] : [0, 0]; + }); + + // update range when workflows change + useMemo(() => { + const n = workflows.length; + if (n >= 2) { + setRange([0, n - 1]); + } else { + setRange([0, 0]); + } + }, [workflows]); + + function rangeLabelFormat(wfi: any) { + const wf = byId[ids[wfi as number]]; + if (!wf) return "-"; + const commit = wf.commit ? shortSha(wf.commit) : ""; + return `${wf.workflow_id} (commit: ${commit})`; + } + + // render slider tick labels when slider is hovered + function valueLabelFormat(idx: number) { + const wf = byId[ids[idx as number]]; + if (!wf) return ""; + + return ( + + WorkflowId: {wf.workflow_id} +
Commit: {shortSha(wf.commit)}
+
{fmtTs(wf.ts)}
+
+ ); + } + + const handleChange = useCallback( + (_event: Event, value: number | number[], _activeThumb: number) => { + if (Array.isArray(value) && value.length === 2) { + const [a, b] = value[0] <= value[1] ? value : [value[1], value[0]]; + setRange([a, b]); + const l = ids[a]; + const r = ids[b]; + console.log("onChange", l, r); + onChange([l, r]); + } + }, + [onChange] + ); + + const minWidth = Math.max(200, 50 * ids.length); + return ( + + + Select L / R Data + + + + + valueLabelFormat(idx)} + disableSwap + /> + + + + + ); +} diff --git a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx index 7932e9725e..62d1060409 100644 --- a/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx +++ b/torchci/components/benchmark/v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx @@ -1,3 +1,5 @@ +import { BenchmarkComparisonPolicyConfig } from "components/benchmark/v3/configs/helpers/RegressionPolicy"; + export type BenchmarkChartSectionConfig = { titleMapping?: Record; groupByFields: string[]; @@ -5,6 +7,24 @@ export type BenchmarkChartSectionConfig = { chartGroup: ChartGroupConfig; }; +export type BenchmarkComparisonTableSectionConfig = { + titleMapping?: Record; + groupByFields: string[]; + filterByFieldValues?: Record>; + tableConfig: ComparisonTableConfig; +}; + +export type ComparisonTableConfig = { + titleMapping?: Record; + nameKeys?: string[]; // the field name used to render the name of the row, if not set, use all groupinfo labels + renderOptions?: {}; + // indicates the field to use for comparison policy map + comparisonPolicyTargetField?: string; + comparisonPolicy?: { + [key: string]: BenchmarkComparisonPolicyConfig; + }; +}; + export type ChartGroupConfig = { type: "line"; titleMapping?: Record; @@ -64,3 +84,53 @@ export function makeGroupKeyAndLabel( const labels = fields.map((f) => `${toStr(gi?.[f])}`); return { key: parts.join("|"), labels }; } + +export function toGroupKeyMap(data: any[], fields: string[]) { + const m = new Map(); + for (const s of data) { + const gi = s.group_info || {}; + const { key, labels } = makeGroupKeyAndLabel(gi, fields); + if (!m.has(key)) m.set(key, { key, labels, items: [] }); + m.get(key)!.items.push(s); + } + return m; +} + +/** + * + * @param data data list, the gruop_info in data object must have workflow_id, commit and branch + * @returns + */ +export function toSortedWorkflowIdMap(data: any[]) { + const workflowIdMap = new Map(); + for (const d of data) { + if (!d.group_info) { + throw new Error( + "[toSortedWorkflowIdMap]group_info is missing when try to form the workflowIdMap " + ); + } + if (!d.group_info.workflow_id) { + throw new Error( + "[toSortedWorkflowIdMap]workflow_id is missing when try to form the workflowIdMap " + ); + } + const id = String(d.group_info.workflow_id); + workflowIdMap.set(id, { + workflow_id: id, + label: id, + commit: d.group_info.commit, + branch: d.group_info.branch, + }); + } + // Sort by numeric if all ids are numbers, else lexicographically + return Array.from(workflowIdMap.values()).sort((a, b) => { + const na = /^\d+$/.test(a.workflow_id) ? Number(a.workflow_id) : NaN; + const nb = /^\d+$/.test(b.workflow_id) ? Number(b.workflow_id) : NaN; + return Number.isNaN(na) || Number.isNaN(nb) + ? a.workflow_id.localeCompare(b.workflow_id) + : na - nb; + }); +} + +export const shortSha = (id?: string) => + id ? (id.length > 10 ? id.slice(0, 7) : id) : "—"; diff --git a/torchci/components/benchmark/v3/components/dataRender/fanout/FanoutBenchmarkTimeSeriesChartSection.tsx b/torchci/components/benchmark/v3/components/dataRender/fanout/FanoutBenchmarkTimeSeriesChartSection.tsx deleted file mode 100644 index c3aeb7b272..0000000000 --- a/torchci/components/benchmark/v3/components/dataRender/fanout/FanoutBenchmarkTimeSeriesChartSection.tsx +++ /dev/null @@ -1,27 +0,0 @@ -import BenchmarkChartSection from "../components/benchmarkTimeSeries/components/BenchmarkChartSection"; -import { - BenchmarkChartSectionConfig, - BenchmarkTimeSeriesInput, -} from "../components/benchmarkTimeSeries/helper"; - -export default function FanoutBenchmarkTimeSeriesChartSection({ - data = [], - config, - onChange, -}: { - data?: BenchmarkTimeSeriesInput[]; - config: any; - onChange?: (payload: any) => void; -}) { - return ( -
- { - onChange?.(payload); - }} - /> -
- ); -} diff --git a/torchci/components/benchmark/v3/components/dataRender/fanout/FanoutComponents.tsx b/torchci/components/benchmark/v3/components/dataRender/fanout/FanoutComponents.tsx new file mode 100644 index 0000000000..ad5a914972 --- /dev/null +++ b/torchci/components/benchmark/v3/components/dataRender/fanout/FanoutComponents.tsx @@ -0,0 +1,47 @@ +import BenchmarkChartSection from "../components/benchmarkTimeSeries/BenchmarkChartSection"; +import BenchmarkTimeSeriesComparisonTableSection from "../components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTableSection"; +import { + BenchmarkChartSectionConfig, + BenchmarkTimeSeriesInput, +} from "../components/benchmarkTimeSeries/helper"; + +export function FanoutBenchmarkTimeSeriesChartSection({ + data = [], + config, + onChange, +}: { + data?: BenchmarkTimeSeriesInput[]; + config: any; + onChange?: (payload: any) => void; +}) { + return ( +
+ { + onChange?.(payload); + }} + /> +
+ ); +} + +export function FanoutBenchmarkTimeSeriesComparisonTableSection({ + data = [], + config, + onChange, +}: { + data?: any[]; + config: any; + onChange?: (payload: any) => void; +}) { + return ( + <> + + + ); +} diff --git a/torchci/components/benchmark/v3/configs/configBook.tsx b/torchci/components/benchmark/v3/configs/configBook.tsx index 2b023dd4c7..1ae8dedec7 100644 --- a/torchci/components/benchmark/v3/configs/configBook.tsx +++ b/torchci/components/benchmark/v3/configs/configBook.tsx @@ -16,6 +16,7 @@ export type UIRenderConfig = { export type DataRenderOption = { type: string; + api?: any; id?: string; // id of the component to render, this is used when type is 'component' renders?: UIRenderConfig[]; // this is used when type is predefined type such as 'default-fanout' }; diff --git a/torchci/components/benchmark/v3/configs/helpers/RegressionPolicy.ts b/torchci/components/benchmark/v3/configs/helpers/RegressionPolicy.ts new file mode 100644 index 0000000000..1f20be0097 --- /dev/null +++ b/torchci/components/benchmark/v3/configs/helpers/RegressionPolicy.ts @@ -0,0 +1,187 @@ +export type ComparisonVerdict = "good" | "neutral" | "regression"; +export type ComparisonPolicyType = "ratio"; +export const DEFAULT_TYPE = "ratio"; +export const DEFAULT_BAD_RATIO = 0.9; +export const DEFAULT_GOOD_RATIO = 1.1; +export const DEFAULT_DIRECTION = "up"; + +export function getDefaultComparisonPolicy( + target: string +): BenchmarkComparisonPolicyConfig { + return { + target, + type: DEFAULT_TYPE, + ratioPolicy: { + badRatio: DEFAULT_BAD_RATIO, + goodRatio: DEFAULT_GOOD_RATIO, + direction: DEFAULT_DIRECTION, + }, + }; +} + +export type BenchmarkComparisonPolicyConfig = { + /** metric/column name this policy applies to */ + target: string; + + type?: ComparisonPolicyType; + + /** ratio-based thresholds relative to oldValue */ + ratioPolicy?: { + /** + * Optional threshold for "good" (clear improvement). + * + * Interpretation depends on `direction`: + * - direction = "up" (higher is better): + * newValue >= oldValue * goodRatio → verdict = "good" + * + * - direction = "down" (lower is better): + * newValue <= oldValue * goodRatio → verdict = "good" + * + * Example: + * direction = "up", goodRatio = 1.0 + * → new must be >= old (no drop) to be considered good. + * + * direction = "down", goodRatio = 0.95 + * → new must be <= 95% of old (≥5% faster) to be considered good. + */ + goodRatio?: number; + + /** + * Mandatory threshold for "regression". + * + * Interpretation depends on `direction`: + * - direction = "up" (higher is better): + * newValue <= oldValue * badRatio → verdict = "regression" + * + * - direction = "down" (lower is better): + * newValue >= oldValue * badRatio → verdict = "regression" + * + * Example: + * direction = "up", badRatio = 0.98 + * → new <= 98% of old (≥2% drop) is a regression. + * + * direction = "down", badRatio = 1.10 + * → new >= 110% of old (≥10% slower) is a regression. + */ + badRatio: number; + + /** + * Direction of improvement: + * - "up" → higher newValue is better (typical for accuracy, pass rate, throughput). + * - "down" → lower newValue is better (typical for latency, memory, runtime). + * + * Default: "up". + */ + direction?: "up" | "down"; + }; +}; + +export type ComparisonResult = { + target: string; + oldValue: number | null; + newValue: number | null; + delta: number | null; + verdict: ComparisonVerdict; + reason?: string; + isDefaultPolicy: boolean; +}; + +// ------------------------------------------------------------------ +// Evaluator +// ------------------------------------------------------------------ + +export function evaluateComparison( + target: string | undefined | null, + oldValue: number | null | undefined, + newValue: number | null | undefined, + policy?: BenchmarkComparisonPolicyConfig +): ComparisonResult { + if (!policy || policy.type == null) { + policy = getDefaultComparisonPolicy("general"); + } + const type: ComparisonPolicyType = policy.type ?? "ratio"; + const base: ComparisonResult = { + target: target ?? "general", + oldValue: oldValue ?? null, + newValue: newValue ?? null, + delta: oldValue != null && newValue != null ? newValue - oldValue : null, + verdict: "neutral", + isDefaultPolicy: !policy || policy.type == null, + }; + // missing values → neutral + if ( + oldValue == null || + newValue == null || + Number.isNaN(oldValue) || + Number.isNaN(newValue) + ) { + return { ...base, reason: "missing value" }; + } + switch (type) { + case "ratio": { + const rp = policy.ratioPolicy ?? { + badRatio: 0.9, + goodRatio: 1.1, + direction: "up", + }; + const dir = rp.direction ?? "up"; + + const calculatedGood = rp.goodRatio + ? oldValue * rp.goodRatio + : oldValue * DEFAULT_GOOD_RATIO; + const calculatedBad = oldValue * rp.badRatio; + // Compare with oldValue * ratio + if (dir === "up") { + if (rp.goodRatio != null && newValue > oldValue * rp.goodRatio) { + return { + ...base, + verdict: "good", + reason: `new > old * goodRatio (${ + rp.goodRatio + })[${calculatedGood.toFixed(2)}]`, + }; + } + if (newValue < oldValue * rp.badRatio) { + return { + ...base, + verdict: "regression", + reason: `new < old * badRatio (${ + rp.badRatio + })[${calculatedBad.toFixed(2)}]`, + }; + } + return { + ...base, + verdict: "neutral", + reason: "between good/bad ratios", + }; + } else { + if (rp.goodRatio != null && newValue < oldValue * rp.goodRatio) { + return { + ...base, + verdict: "good", + reason: `new < old * goodRatio (${ + rp.goodRatio + })[${calculatedGood.toFixed(2)}}]`, + }; + } + if (newValue > oldValue * rp.badRatio) { + return { + ...base, + verdict: "regression", + reason: `new > old * badRatio (${ + rp.badRatio + })[${calculatedBad.toFixed(2)}]`, + }; + } + return { + ...base, + verdict: "neutral", + reason: "between good/bad ratios", + }; + } + } + default: + return { ...base, verdict: "neutral", reason: "no policy" }; + } +} diff --git a/torchci/components/benchmark/v3/configs/teams/compilers/config.ts b/torchci/components/benchmark/v3/configs/teams/compilers/config.ts index 64a11e28f2..dd3eba562e 100644 --- a/torchci/components/benchmark/v3/configs/teams/compilers/config.ts +++ b/torchci/components/benchmark/v3/configs/teams/compilers/config.ts @@ -9,12 +9,50 @@ import dayjs from "dayjs"; import utc from "dayjs/plugin/utc"; import { REQUIRED_COMPLIER_LIST_COMMITS_KEYS } from "lib/benchmark/api_helper/compilers/type"; import { BenchmarkUIConfig } from "../../configBook"; +import { BenchmarkComparisonPolicyConfig } from "../../helpers/RegressionPolicy"; import { QueryParameterConverter, QueryParameterConverterInputs, } from "../../utils/dataBindingRegistration"; dayjs.extend(utc); +const PASSRATE_COMPARISON_POLICY: BenchmarkComparisonPolicyConfig = { + target: "passrate", + type: "ratio", + ratioPolicy: { + badRatio: 0.95, + goodRatio: 1.05, + direction: "up", + }, +}; +const GEOMEAN_COMPARISON_POLICY: BenchmarkComparisonPolicyConfig = { + target: "geomean", + type: "ratio", + ratioPolicy: { + badRatio: 0.95, + goodRatio: 1.05, + direction: "up", + }, +}; +const EXECUTION_TIME_COMPARISON_POLICY: BenchmarkComparisonPolicyConfig = { + target: "execution_time", + type: "ratio", + ratioPolicy: { + badRatio: 1.1, + goodRatio: 0.9, + direction: "down", + }, +}; +const COMPILATION_LATENCY_POLICY: BenchmarkComparisonPolicyConfig = { + target: "compilation_latency", + type: "ratio", + ratioPolicy: { + badRatio: 0.95, + goodRatio: 1.05, + direction: "up", + }, +}; + export const compilerQueryParameterConverter: QueryParameterConverter = ( inputs: QueryParameterConverterInputs ) => { @@ -99,6 +137,30 @@ export const CompilerPrecomputeBenchmarkUIConfig: BenchmarkUIConfig = { }, }, }, + { + type: "FanoutBenchmarkTimeSeriesComparisonTableSection", + config: { + groupByFields: ["metric"], + filterByFieldValues: { + metric: [ + "passrate", + "geomean", + "execution_time", + "compilation_latency", + ], + }, + tableConfig: { + nameKeys: ["compiler"], + comparisonPolicyTargetField: "metric", + comparisonPolicy: { + passrate: PASSRATE_COMPARISON_POLICY, + geomean: GEOMEAN_COMPARISON_POLICY, + execution_time: EXECUTION_TIME_COMPARISON_POLICY, + compilation_latency: COMPILATION_LATENCY_POLICY, + }, + }, + }, + }, ], }, }; diff --git a/torchci/components/benchmark/v3/configs/typeProcess.tsx b/torchci/components/benchmark/v3/configs/typeProcess.tsx deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/torchci/components/benchmark/v3/configs/utils/fanoutRegistration.tsx b/torchci/components/benchmark/v3/configs/utils/fanoutRegistration.tsx index 590f2dcd9c..b8bd3528b4 100644 --- a/torchci/components/benchmark/v3/configs/utils/fanoutRegistration.tsx +++ b/torchci/components/benchmark/v3/configs/utils/fanoutRegistration.tsx @@ -1,4 +1,7 @@ -import FanoutBenchmarkTimeSeriesChartSection from "../../components/dataRender/fanout/FanoutBenchmarkTimeSeriesChartSection"; +import { + FanoutBenchmarkTimeSeriesChartSection, + FanoutBenchmarkTimeSeriesComparisonTableSection, +} from "../../components/dataRender/fanout/FanoutComponents"; /** ---------------- Types ---------------- */ export type FanoutComponentProps = { @@ -46,8 +49,11 @@ export class FanoutRegistry { Component: FanoutBenchmarkTimeSeriesChartSection, data_path: "time_series", }, + FanoutBenchmarkTimeSeriesComparisonTableSection: { + Component: FanoutBenchmarkTimeSeriesComparisonTableSection, + data_path: "table", + }, }; - this.map = Object.freeze({ ...registry }); this.fallback = Object.freeze({ Component: ErrorFanoutComponent }); Object.freeze(this); // freeze the instance so it can't be mutated diff --git a/torchci/lib/benchmark/api_helper/compilers/helpers/general.ts b/torchci/lib/benchmark/api_helper/compilers/helpers/general.ts index d0ed39d2d9..ad0a49b6a5 100644 --- a/torchci/lib/benchmark/api_helper/compilers/helpers/general.ts +++ b/torchci/lib/benchmark/api_helper/compilers/helpers/general.ts @@ -1,5 +1,5 @@ import { - groupByBenchmarkData, + to_table, to_time_series_data, toTimeSeriesResponse, } from "../../utils"; @@ -26,6 +26,7 @@ const COMPILER_GENERAL_TABLE_GROUP_KEY = [ "branch", "compiler", "model", + "suite", ]; const COMPILER_GENERAL_TABLE_SUB_GROUP_KEY = ["metric"]; @@ -62,7 +63,7 @@ function getformat(data: any, format: string) { COMPILER_GENERAL_TS_SUB_GROUP_KEY ); case "table": - return groupByBenchmarkData( + return to_table( data, COMPILER_GENERAL_TABLE_GROUP_KEY, COMPILER_GENERAL_TABLE_SUB_GROUP_KEY diff --git a/torchci/lib/benchmark/api_helper/compilers/helpers/precompute.ts b/torchci/lib/benchmark/api_helper/compilers/helpers/precompute.ts index be3ec4c6bf..1167a68967 100644 --- a/torchci/lib/benchmark/api_helper/compilers/helpers/precompute.ts +++ b/torchci/lib/benchmark/api_helper/compilers/helpers/precompute.ts @@ -9,7 +9,7 @@ import { getPassingModels, } from "lib/benchmark/compilerUtils"; import { - groupByBenchmarkData, + to_table, to_time_series_data, toTimeSeriesResponse, toWorkflowIdMap, @@ -33,11 +33,13 @@ const COMPILER_PRECOMPUTE_TABLE_GROUP_KEY = [ "arch", "device", "mode", - "metric", "workflow_id", + "commit", "branch", + "metric", + "compiler", ]; -const COMPILER_PRECOMPUTE_TABLE_SUB_GROUP_KEY = ["compiler"]; +const COMPILER_PRECOMPUTE_TABLE_SUB_GROUP_KEY = ["suite"]; export function toPrecomputeCompilerData( rawData: any[], @@ -121,7 +123,7 @@ function getFormat(data: any, format: string) { ); break; case "table": - return groupByBenchmarkData( + return to_table( data, COMPILER_PRECOMPUTE_TABLE_GROUP_KEY, COMPILER_PRECOMPUTE_TABLE_SUB_GROUP_KEY diff --git a/torchci/lib/benchmark/api_helper/utils.ts b/torchci/lib/benchmark/api_helper/utils.ts index 11d05b4a7a..fbd744b8cf 100644 --- a/torchci/lib/benchmark/api_helper/utils.ts +++ b/torchci/lib/benchmark/api_helper/utils.ts @@ -258,7 +258,6 @@ export function to_time_series_data( if (item.data.length > 1) { const key = makeGroupKey(group_info); const sub_key = makeGroupKey(item.group_info); - diffs.push({ key: `${key}___${sub_key}`, data: item.data, @@ -271,7 +270,6 @@ export function to_time_series_data( new Date(a.granularity_bucket).getTime() - new Date(b.granularity_bucket).getTime() ); - if (diffs.length > 0) { console.log( `we detected multiple datapoints for the same group keys ${diffs.length}` @@ -280,8 +278,25 @@ export function to_time_series_data( return { group_info, num_of_dp: ts_list.length, + group_keys: Object.keys(group_info), + sub_keys, data: ts_list, }; }); return result; } + +export function to_table(data: any[], keys: string[], sub_keys: string[]) { + const tsd = groupByBenchmarkData(data, keys, sub_keys); + const result = tsd.map((group) => { + const group_info = group.group_info; + return { + group_info, + group_keys: Object.keys(group_info), + sub_keys: sub_keys, + num_of_dp: Object.values(group.rows).length, + data: group.rows, + }; + }); + return result; +}