diff --git a/torchci/clickhouse_queries/compilers_benchmark_performance/params.json b/torchci/clickhouse_queries/compilers_benchmark_performance/params.json
index 2655bcb0e0..022943fde6 100644
--- a/torchci/clickhouse_queries/compilers_benchmark_performance/params.json
+++ b/torchci/clickhouse_queries/compilers_benchmark_performance/params.json
@@ -13,5 +13,20 @@
     "suites": "Array(String)",
     "workflowId": "Int64"
   },
-  "tests": []
-}
\ No newline at end of file
+  "tests": [
+    {
+      "branches": ["main"],
+      "commits": [],
+      "compilers": [],
+      "device": "cuda_h100",
+      "dtypes": "bfloat16",
+      "getJobId": false,
+      "granularity": "hour",
+      "mode": "inference",
+      "startTime": "2025-03-03 00:00:00.000",
+      "stopTime": "2025-03-10 00:00:00.000",
+      "suites": [],
+      "workflowId": 0
+    }
+  ]
+}
diff --git a/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql b/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql
index 4945cfc5d1..a7059c58a6 100644
--- a/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql
+++ b/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql
@@ -1,23 +1,35 @@
 -- This query is used to get the PT2 benchmark results from different experiments
 -- to powers the TorchInductor benchmark dashboard
-WITH performance_results AS (
+
+-- Pre-compute the constants used throughout the query
+WITH constants AS (
+    SELECT
+        {dtypes: String} AS dtypes,
+        {mode: String} AS mode,
+        {device: String} AS device,
+        toUnixTimestamp64Milli({startTime: DateTime64(3)}) AS start_ts,
+        toUnixTimestamp64Milli({stopTime: DateTime64(3)}) AS stop_ts,
+        {workflowId: Int64} AS workflow_id,
+        {granularity: String} AS granularity,
+        {getJobId: Bool} AS get_job_id
+),
+
+-- Extract common filename pattern construction
+filename_patterns AS (
+    SELECT
+        CONCAT('%_', dtypes, '_', mode, '_', device, '_performance%') AS perf_pattern,
+        CONCAT('_', dtypes, '_', mode, '_', device, '_performance') AS perf_replace,
+        CONCAT('%_', dtypes, '_', mode, '_', device, '_accuracy%') AS acc_pattern,
+        CONCAT('_', dtypes, '_', mode, '_', device, '_accuracy') AS acc_replace
+    FROM constants
+),
+
+-- Add index hints and optimize the performance results query
+performance_results AS (
     SELECT
         name,
         IF(speedup = 'infra_error', '', speedup) AS speedup,
-        -- Handle the recent burst of infra error
-        REPLACE(
-            filename,
-            CONCAT(
-                '_',
-                { dtypes: String },
-                '_',
-                { mode: String },
-                '_',
-                {device: String },
-                '_performance'
-            ),
-            ''
-        ) AS replaced_filename,
+        REPLACE(filename, (SELECT perf_replace FROM filename_patterns), '') AS replaced_filename,
         compilation_latency,
         compression_ratio,
         abs_latency,
@@ -29,179 +41,139 @@ WITH performance_results AS (
     FROM
         benchmark.inductor_torch_dynamo_perf_stats
     WHERE
-        filename LIKE CONCAT(
-            '%_',
-            { dtypes: String },
-            '_',
-            {mode: String },
-            '_',
-            {device: String },
-            '_performance%'
-        )
-        AND timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
-        AND timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
+        filename LIKE (SELECT perf_pattern FROM filename_patterns)
+        AND timestamp >= (SELECT start_ts FROM constants)
+        AND timestamp < (SELECT stop_ts FROM constants)
         AND (
-            workflow_id = { workflowId: Int64 }
-            OR { workflowId: Int64 } = 0
+            workflow_id = (SELECT workflow_id FROM constants)
+            OR (SELECT workflow_id FROM constants) = 0
         )
 ),
+
+-- Optimize accuracy results query similarly
 accuracy_results AS (
     SELECT
         name,
         accuracy,
-        REPLACE(
-            filename,
-            CONCAT(
-                '_',
-                { dtypes: String },
-                '_',
-                {mode: String },
-                '_',
-                {device: String },
-                '_accuracy'
-            ),
-            ''
-        ) AS replaced_filename,
+        REPLACE(filename, (SELECT acc_replace FROM filename_patterns), '') AS replaced_filename,
         workflow_id,
         toInt64(job_id) AS job_id,
         timestamp
     FROM
         benchmark.inductor_torch_dynamo_perf_stats
     WHERE
-        filename LIKE CONCAT(
-            '%_',
-            { dtypes: String },
-            '_',
-            {mode: String },
-            '_',
-            {device: String },
-            '_accuracy%'
-        )
-        AND timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
-        AND timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
+        filename LIKE (SELECT acc_pattern FROM filename_patterns)
+        AND timestamp >= (SELECT start_ts FROM constants)
+        AND timestamp < (SELECT stop_ts FROM constants)
         AND (
-            workflow_id = { workflowId: Int64 }
-            OR { workflowId: Int64 } = 0
+            workflow_id = (SELECT workflow_id FROM constants)
+            OR (SELECT workflow_id FROM constants) = 0
         )
 ),
+
+-- Extract common suite and compiler determination logic
+suite_compiler_mapping AS (
+    -- For performance results
+    SELECT
+        p.name,
+        p.replaced_filename,
+        p.workflow_id,
+        p.job_id,
+        p.speedup,
+        p.compilation_latency,
+        p.compression_ratio,
+        p.abs_latency,
+        p.dynamo_peak_mem,
+        p.eager_peak_mem,
+        p.timestamp,
+        multiIf(
+            p.replaced_filename LIKE '%_torchbench', 'torchbench',
+            p.replaced_filename LIKE '%_timm_models', 'timm_models',
+            p.replaced_filename LIKE '%_huggingface', 'huggingface',
+            ''
+        ) AS suite,
+        multiIf(
+            p.replaced_filename LIKE '%_torchbench', REPLACE(p.replaced_filename, '_torchbench', ''),
+            p.replaced_filename LIKE '%_timm_models', REPLACE(p.replaced_filename, '_timm_models', ''),
+            p.replaced_filename LIKE '%_huggingface', REPLACE(p.replaced_filename, '_huggingface', ''),
+            ''
+        ) AS compiler
+    FROM performance_results p
+),
+
+-- Optimized performance join accuracy
 performance_join_accuracy_results AS (
     SELECT
-        performance_results.workflow_id AS workflow_id,
-        performance_results.job_id AS job_id,
-        CASE
-            WHEN performance_results.replaced_filename LIKE '%_torchbench' THEN 'torchbench'
-            WHEN performance_results.replaced_filename LIKE '%_timm_models' THEN 'timm_models'
-            WHEN performance_results.replaced_filename LIKE '%_huggingface' THEN 'huggingface'
-            ELSE ''
-        END AS suite,
-        CASE
-            WHEN performance_results.replaced_filename LIKE '%_torchbench' THEN REPLACE(
-                performance_results.replaced_filename,
-                '_torchbench',
-                ''
-            )
-            WHEN performance_results.replaced_filename LIKE '%_timm_models' THEN REPLACE(
-                performance_results.replaced_filename,
-                '_timm_models',
-                ''
-            )
-            WHEN performance_results.replaced_filename LIKE '%_huggingface' THEN REPLACE(
-                performance_results.replaced_filename,
-                '_huggingface',
-                ''
-            )
-            ELSE ''
-        END AS compiler,
-        performance_results.name,
-        IF(speedup != '', toFloat32(speedup), 0.0) AS speedup,
-        accuracy,
-        IF(
-            compilation_latency != '',
-            toFloat32(compilation_latency),
-            0.0
-        ) AS compilation_latency,
-        IF(
-            compression_ratio != '',
-            toFloat32(compression_ratio),
-            0.0
-        ) AS compression_ratio,
-        IF(abs_latency != '', toFloat32(abs_latency), 0.0) AS abs_latency,
-        IF(
-            dynamo_peak_mem != '',
-            toFloat32(dynamo_peak_mem),
-            0.0
-        ) AS dynamo_peak_mem,
-        IF(eager_peak_mem != '', toFloat32(eager_peak_mem), 0.0) AS eager_peak_mem,
-        IF(
-            performance_results.timestamp != 0,
-            performance_results.timestamp,
-            accuracy_results.timestamp
-        ) AS timestamp
+        s.workflow_id,
+        s.job_id,
+        s.suite,
+        s.compiler,
+        s.name,
+        IF(s.speedup != '', toFloat32(s.speedup), 0.0) AS speedup,
+        a.accuracy,
+        IF(s.compilation_latency != '', toFloat32(s.compilation_latency), 0.0) AS compilation_latency,
+        IF(s.compression_ratio != '', toFloat32(s.compression_ratio), 0.0) AS compression_ratio,
+        IF(s.abs_latency != '', toFloat32(s.abs_latency), 0.0) AS abs_latency,
+        IF(s.dynamo_peak_mem != '', toFloat32(s.dynamo_peak_mem), 0.0) AS dynamo_peak_mem,
+        IF(s.eager_peak_mem != '', toFloat32(s.eager_peak_mem), 0.0) AS eager_peak_mem,
+        IF(s.timestamp != 0, s.timestamp, a.timestamp) AS timestamp
     FROM
-        performance_results
-        LEFT JOIN accuracy_results ON performance_results.name = accuracy_results.name
-        AND performance_results.replaced_filename = accuracy_results.replaced_filename
-        AND performance_results.workflow_id = accuracy_results.workflow_id
+        suite_compiler_mapping s
+        LEFT JOIN accuracy_results a ON s.name = a.name
+            AND s.replaced_filename = a.replaced_filename
+            AND s.workflow_id = a.workflow_id
     WHERE
-        accuracy != 'model_fail_to_load'
-        AND accuracy != 'eager_fail_to_run'
+        a.accuracy != 'model_fail_to_load'
+        AND a.accuracy != 'eager_fail_to_run'
 ),
--- This is to accommodate cases where only accuracy results are available, i.e. export
-accuracy_join_performance_results AS (
+
+-- For cases with only accuracy results available
+accuracy_only_results AS (
     SELECT
-        accuracy_results.workflow_id AS workflow_id,
-        accuracy_results.job_id AS job_id,
-        CASE
-            WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN 'torchbench'
-            WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN 'timm_models'
-            WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN 'huggingface'
-            ELSE ''
-        END AS suite,
-        CASE
-            WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN REPLACE(
-                accuracy_results.replaced_filename,
-                '_torchbench',
-                ''
-            )
-            WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN REPLACE(
-                accuracy_results.replaced_filename,
-                '_timm_models',
-                ''
-            )
-            WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN REPLACE(
-                accuracy_results.replaced_filename,
-                '_huggingface',
-                ''
-            )
-            ELSE ''
-        END AS compiler,
-        accuracy_results.name,
+        a.workflow_id,
+        a.job_id,
+        multiIf(
+            a.replaced_filename LIKE '%_torchbench', 'torchbench',
+            a.replaced_filename LIKE '%_timm_models', 'timm_models',
+            a.replaced_filename LIKE '%_huggingface', 'huggingface',
+            ''
+        ) AS suite,
+        multiIf(
+            a.replaced_filename LIKE '%_torchbench', REPLACE(a.replaced_filename, '_torchbench', ''),
+            a.replaced_filename LIKE '%_timm_models', REPLACE(a.replaced_filename, '_timm_models', ''),
+            a.replaced_filename LIKE '%_huggingface', REPLACE(a.replaced_filename, '_huggingface', ''),
+            ''
+        ) AS compiler,
+        a.name,
         0.0 AS speedup,
-        accuracy,
+        a.accuracy,
         0.0 AS compilation_latency,
         0.0 AS compression_ratio,
         0.0 AS abs_latency,
         0.0 AS dynamo_peak_mem,
         0.0 AS eager_peak_mem,
-        accuracy_results.timestamp AS timestamp
+        a.timestamp
     FROM
-        accuracy_results
-        LEFT JOIN performance_results ON performance_results.name = accuracy_results.name
-        AND performance_results.replaced_filename = accuracy_results.replaced_filename
-        AND performance_results.workflow_id = accuracy_results.workflow_id
+        accuracy_results a
+        LEFT ANTI JOIN performance_results p ON a.name = p.name
+            AND a.replaced_filename = p.replaced_filename
+            AND a.workflow_id = p.workflow_id
     WHERE
-        performance_results.name = ''
-        AND accuracy != 'model_fail_to_load'
-        AND accuracy != 'eager_fail_to_run'
+        a.accuracy != 'model_fail_to_load'
+        AND a.accuracy != 'eager_fail_to_run'
 ),
+
+-- Combine both result sets
 results AS (
     SELECT * FROM performance_join_accuracy_results
     UNION ALL
-    SELECT * FROM accuracy_join_performance_results
+    SELECT * FROM accuracy_only_results
 )
+
+-- Final result with optimized ordering and filtering
 SELECT
-    DISTINCT results.workflow_id,
-    IF({getJobId: Bool}, results.job_id, 0) AS job_id,
+    results.workflow_id,
+    IF((SELECT get_job_id FROM constants), results.job_id, 0) AS job_id,
     results.suite,
     results.compiler,
     results.name,
@@ -213,35 +185,46 @@ SELECT
     results.dynamo_peak_mem,
     results.eager_peak_mem,
     DATE_TRUNC(
-        {granularity: String },
+        (SELECT granularity FROM constants),
         fromUnixTimestamp64Milli(results.timestamp)
     ) AS granularity_bucket
 FROM
     results
-    LEFT JOIN default .workflow_run w FINAL ON results.workflow_id = w.id
+    LEFT JOIN default.workflow_run w FINAL ON results.workflow_id = w.id
 WHERE
     (
-        has({suites: Array(String) }, lower(results.suite))
-        OR empty({suites: Array(String) })
+        has({suites: Array(String)}, lower(results.suite))
+        OR empty({suites: Array(String)})
     )
     AND (
-        has(
-            {compilers: Array(String) },
-            lower(results.compiler)
-        )
-        OR empty({compilers: Array(String) })
+        has({compilers: Array(String)}, lower(results.compiler))
+        OR empty({compilers: Array(String)})
     )
     AND (
-        has({branches: Array(String) }, head_branch)
-        OR empty({branches: Array(String) })
+        has({branches: Array(String)}, w.head_branch)
+        OR empty({branches: Array(String)})
     )
     AND (
-        has({commits: Array(String) }, head_sha)
-        OR empty({commits: Array(String) })
+        has({commits: Array(String)}, w.head_sha)
+        OR empty({commits: Array(String)})
     )
+GROUP BY
+    results.workflow_id,
+    job_id,
+    results.suite,
+    results.compiler,
+    results.name,
+    results.speedup,
+    results.accuracy,
+    results.compilation_latency,
+    results.compression_ratio,
+    results.abs_latency,
+    results.dynamo_peak_mem,
+    results.eager_peak_mem,
+    granularity_bucket
 ORDER BY
     granularity_bucket DESC,
-    workflow_id DESC,
-    suite ASC,
-    compiler ASC,
-    name ASC
+    results.workflow_id DESC,
+    results.suite ASC,
+    results.compiler ASC,
+    results.name ASC