From 0313434d86e4e810f7e0dffe2199db2d72257802 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 5 Sep 2025 21:01:21 -0700 Subject: [PATCH 1/3] Update [ghstack-poisoned] --- .../compilers_benchmark_api_query/params.json | 17 ++++ .../compilers_benchmark_api_query/query.sql | 37 +++++++ .../api_helper/compilers/precompute.ts | 98 +++++++++++++------ torchci/lib/benchmark/api_helper/utils.ts | 2 + torchci/lib/benchmark/compilerUtils.ts | 2 + torchci/lib/types.ts | 4 + 6 files changed, 131 insertions(+), 29 deletions(-) create mode 100644 torchci/clickhouse_queries/compilers_benchmark_api_query/params.json create mode 100644 torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql diff --git a/torchci/clickhouse_queries/compilers_benchmark_api_query/params.json b/torchci/clickhouse_queries/compilers_benchmark_api_query/params.json new file mode 100644 index 0000000000..95f00e1501 --- /dev/null +++ b/torchci/clickhouse_queries/compilers_benchmark_api_query/params.json @@ -0,0 +1,17 @@ +{ + "params": { + "branches": "Array(String)", + "commits": "Array(String)", + "compilers": "Array(String)", + "device": "String", + "arch": "String", + "dtype": "String", + "granularity": "String", + "mode": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "suites": "Array(String)", + "workflowId": "Int64" + }, + "tests": [] +} diff --git a/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql b/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql new file mode 100644 index 0000000000..3a3473d88c --- /dev/null +++ b/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql @@ -0,0 +1,37 @@ + +SELECT + workflow_id, + job_id, + head_sha, + replaceOne(head_branch, 'refs/heads/', '') AS head_branch, + suite, + model_name AS model, + metric_name AS metric, + value, + metric_extra_info AS extra_info, + benchmark_extra_info['output'] AS output, + timestamp, + DATE_TRUNC({granularity: String}, fromUnixTimestamp(timestamp)) AS granularity_bucket +FROM benchmark.oss_ci_benchmark_torchinductor +PREWHERE + timestamp >= toUnixTimestamp({startTime:DateTime64(3)}) AND + timestamp < toUnixTimestamp({stopTime:DateTime64(3)}) +WHERE + (head_sha) IN ( + SELECT DISTINCT + head_sha + FROM benchmark.oss_ci_benchmark_torchinductor + PREWHERE + timestamp >= toUnixTimestamp({startTime: DateTime64(3,)}) + AND timestamp < toUnixTimestamp({stopTime: DateTime64(3)}) + ) + AND ( + has({branches: Array(String)}, replaceOne(head_branch, 'refs/heads/', '')) + OR empty({branches: Array(String)}) + ) + AND benchmark_dtype = {dtype: String} + AND benchmark_mode = {mode: String} + AND device = {device: String} + AND positionCaseInsensitive(arch,{arch: String}) > 0 + +SETTINGS session_timezone = 'UTC'; diff --git a/torchci/lib/benchmark/api_helper/compilers/precompute.ts b/torchci/lib/benchmark/api_helper/compilers/precompute.ts index 0a11ab44b3..72e018724b 100644 --- a/torchci/lib/benchmark/api_helper/compilers/precompute.ts +++ b/torchci/lib/benchmark/api_helper/compilers/precompute.ts @@ -6,34 +6,29 @@ import { getPassingModels, } from "lib/benchmark/compilerUtils"; import { queryClickhouseSaved } from "lib/clickhouse"; -import { - BenchmarkTimeSeriesResponse, - CommitRow, - groupByBenchmarkData, - toCommitRowMap, -} from "../utils"; - -const BENCNMARK_TABLE_NAME = "compilers_benchmark_performance"; -const BENCNMARK_COMMIT_NAME = "compilers_benchmark_performance_branches"; +import { CompilerPerformanceData } from "lib/types"; +import { BenchmarkTimeSeriesResponse, groupByBenchmarkData } from "../utils"; +//["x86_64","NVIDIA A10G","NVIDIA H100 80GB HBM3"] +const COMPILER_BENCHMARK_TABLE_NAME = "compilers_benchmark_api_query"; // TODO(elainewy): improve the fetch performance -export async function getCompilerBenchmarkData(inputparams: any) { +export async function getCompilerBenchmarkData( + inputparams: any, + query_table: string = "" +) { + let table = COMPILER_BENCHMARK_TABLE_NAME; + if (query_table.length > 0) { + table = query_table; + } + const start = Date.now(); - const rows = await queryClickhouseSaved(BENCNMARK_TABLE_NAME, inputparams); + let rows = await queryClickhouseSaved(table, inputparams); const end = Date.now(); - console.log("time to get data", end - start); - - const startc = Date.now(); - const commits = await queryClickhouseSaved( - BENCNMARK_COMMIT_NAME, - inputparams - ); - const endc = Date.now(); - console.log("time to get commit data", endc - startc); - const commitMap = toCommitRowMap(commits); + console.log("time to get compiler timeseris data", end - start); if (rows.length === 0) { const response: BenchmarkTimeSeriesResponse = { + total_rows: 0, time_series: [], time_range: { start: "", @@ -43,11 +38,26 @@ export async function getCompilerBenchmarkData(inputparams: any) { return response; } + // extract backend from output in runtime instead of doing it in the query. since it's expensive for regex matching. + // TODO(elainewy): we should add this as a column in the database for less runtime logics. + rows.map((row) => { + const backend = + row.backend && row.backend !== "" + ? row.backend + : extractBackendSqlStyle( + row.output, + row.suite, + inputparams.dtype, + inputparams.mode, + inputparams.device + ); + row["backend"] = backend; + }); + // TODO(elainewy): add logics to handle the case to return raw data const benchmark_time_series_response = toPrecomputeCompiler( rows, inputparams, - commitMap, "time_series" ); return benchmark_time_series_response; @@ -56,18 +66,16 @@ export async function getCompilerBenchmarkData(inputparams: any) { function toPrecomputeCompiler( rawData: any[], inputparams: any, - commitMap: Record, type: string = "time_series" ) { const data = convertToCompilerPerformanceData(rawData); + const commit_map = toWorkflowIdMap(data); const models = getPassingModels(data); - const passrate = computePassrate(data, models); const geomean = computeGeomean(data, models); const peakMemory = computeMemoryCompressionRatio(data, models); const all_data = [passrate, geomean, peakMemory].flat(); - const earliest_timestamp = Math.min( ...all_data.map((row) => new Date(row.granularity_bucket).getTime()) ); @@ -81,9 +89,8 @@ function toPrecomputeCompiler( row["arch"] = inputparams["arch"]; row["device"] = inputparams["device"]; row["mode"] = inputparams["mode"]; - // always keep this: - row["commit"] = commitMap[row["workflow_id"]]?.head_sha; - row["branch"] = commitMap[row["workflow_id"]]?.head_branch; + row["commit"] = commit_map.get(row.workflow_id)?.commit; + row["branch"] = commit_map.get(row.workflow_id)?.branch; }); let res: any[] = []; @@ -163,11 +170,44 @@ function toPrecomputeCompiler( } const response: BenchmarkTimeSeriesResponse = { - time_series: res, + total_rows: res.length, + total_raw_rows: rawData.length, time_range: { start: new Date(earliest_timestamp).toISOString(), end: new Date(latest_timestamp).toISOString(), }, + time_series: res, }; return response; } + +export function extractBackendSqlStyle( + output: string, + suite: string, + dtype: string, + mode: string, + device: string +): string | null { + const esc = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const tail = `_${esc(suite)}_${esc(dtype)}_${esc(mode)}_${esc(device)}_`; + + const temp = output.replace(new RegExp(`${tail}.*$`), ""); + + const m = temp.match(/.*[\/\\]([^\/\\]+)$/); + return m ? m[1] : null; +} + +export function toWorkflowIdMap(data: CompilerPerformanceData[]) { + const commit_map = new Map(); + data.forEach((row) => { + const commit = row?.commit; + const branch = row?.branch; + const workflow_id = `${row.workflow_id}`; + commit_map.set(workflow_id, { + commit, + branch, + workflow_id, + }); + }); + return commit_map; +} diff --git a/torchci/lib/benchmark/api_helper/utils.ts b/torchci/lib/benchmark/api_helper/utils.ts index 47a90e003d..47a4cf92f0 100644 --- a/torchci/lib/benchmark/api_helper/utils.ts +++ b/torchci/lib/benchmark/api_helper/utils.ts @@ -136,8 +136,10 @@ export function getNestedField(obj: any, path: string): any { } export type BenchmarkTimeSeriesResponse = { + total_rows: number; time_series: any[]; time_range: { start: string; end: string }; + total_raw_rows?: number; }; export type CommitRow = { diff --git a/torchci/lib/benchmark/compilerUtils.ts b/torchci/lib/benchmark/compilerUtils.ts index 06e4542269..2614672383 100644 --- a/torchci/lib/benchmark/compilerUtils.ts +++ b/torchci/lib/benchmark/compilerUtils.ts @@ -440,6 +440,8 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) { suite: r.suite, workflow_id: r.workflow_id, job_id: r.job_id, + branch: r.head_branch, + commit: r.head_sha, }; } diff --git a/torchci/lib/types.ts b/torchci/lib/types.ts index dd0242d170..80a97acba6 100644 --- a/torchci/lib/types.ts +++ b/torchci/lib/types.ts @@ -204,6 +204,8 @@ export interface CompilerPerformanceData { suite: string; workflow_id: number; job_id?: number; + branch?: string; + commit?: string; } export interface TritonBenchPerformanceData { @@ -231,6 +233,8 @@ export interface BenchmarkData { suite: string; value: number; workflow_id: number; + head_sha?: string; + head_branch?: string; } export interface RepoBranchAndCommit { From 9dbf66fc02200bec3b3984fc813581a647229c3c Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 5 Sep 2025 22:49:39 -0700 Subject: [PATCH 2/3] Update [ghstack-poisoned] --- .../clickhouse_queries/compilers_benchmark_api_query/query.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql b/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql index 3a3473d88c..9152e44c1e 100644 --- a/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql +++ b/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql @@ -13,9 +13,6 @@ SELECT timestamp, DATE_TRUNC({granularity: String}, fromUnixTimestamp(timestamp)) AS granularity_bucket FROM benchmark.oss_ci_benchmark_torchinductor -PREWHERE - timestamp >= toUnixTimestamp({startTime:DateTime64(3)}) AND - timestamp < toUnixTimestamp({stopTime:DateTime64(3)}) WHERE (head_sha) IN ( SELECT DISTINCT From b4a903832fe010c456b0451a8163b362a7223c63 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 8 Sep 2025 10:54:07 -0700 Subject: [PATCH 3/3] Update [ghstack-poisoned] --- .../compilers_benchmark_api_query/query.sql | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql b/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql index 9152e44c1e..e84bba506e 100644 --- a/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql +++ b/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql @@ -1,34 +1,36 @@ - SELECT - workflow_id, - job_id, - head_sha, - replaceOne(head_branch, 'refs/heads/', '') AS head_branch, - suite, - model_name AS model, - metric_name AS metric, - value, - metric_extra_info AS extra_info, - benchmark_extra_info['output'] AS output, - timestamp, - DATE_TRUNC({granularity: String}, fromUnixTimestamp(timestamp)) AS granularity_bucket + workflow_id, + job_id, + head_sha, + replaceOne(head_branch, 'refs/heads/', '') AS head_branch, + suite, + model_name AS model, + metric_name AS metric, + value, + metric_extra_info AS extra_info, + benchmark_extra_info['output'] AS output, + timestamp, + DATE_TRUNC({granularity: String}, fromUnixTimestamp(timestamp)) + AS granularity_bucket FROM benchmark.oss_ci_benchmark_torchinductor WHERE - (head_sha) IN ( - SELECT DISTINCT - head_sha - FROM benchmark.oss_ci_benchmark_torchinductor - PREWHERE - timestamp >= toUnixTimestamp({startTime: DateTime64(3,)}) - AND timestamp < toUnixTimestamp({stopTime: DateTime64(3)}) - ) - AND ( - has({branches: Array(String)}, replaceOne(head_branch, 'refs/heads/', '')) - OR empty({branches: Array(String)}) - ) - AND benchmark_dtype = {dtype: String} - AND benchmark_mode = {mode: String} - AND device = {device: String} - AND positionCaseInsensitive(arch,{arch: String}) > 0 + (head_sha) IN ( + SELECT DISTINCT head_sha + FROM benchmark.oss_ci_benchmark_torchinductor + PREWHERE + timestamp >= toUnixTimestamp({startTime: DateTime64(3,)}) + AND timestamp < toUnixTimestamp({stopTime: DateTime64(3)}) + ) + AND ( + has( + {branches: Array(String)}, + replaceOne(head_branch, 'refs/heads/', '') + ) + OR empty({branches: Array(String)}) + ) + AND benchmark_dtype = {dtype: String} + AND benchmark_mode = {mode: String} + AND device = {device: String} + AND positionCaseInsensitive(arch, {arch: String}) > 0 SETTINGS session_timezone = 'UTC';