From 91ce6889d2715e37c95d6323b984fe38642e6c54 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Wed, 30 Oct 2024 13:11:59 -0400
Subject: [PATCH 01/24] a universal approach to crossing out failed tests

---
 tests/broken_tests.json | 16 ++++++++++++++++
 tests/ci/report.py      | 27 ++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 tests/broken_tests.json

diff --git a/tests/broken_tests.json b/tests/broken_tests.json
new file mode 100644
index 000000000000..28ffc977d28b
--- /dev/null
+++ b/tests/broken_tests.json
@@ -0,0 +1,16 @@
+{
+    "test_replicated_merge_tree_replicated_db_ttl/test.py::test_replicated_db_and_ttl": {
+        "message": "DB::Exception: Replicated is an experimental database engine.",
+        "reason": "Will not work without allow_experimental_database_replicated=1"
+    },
+    "test_storage_s3_queue/test.py::test_upgrade": {
+        "message": "DB::Exception: S3Queue is experimental.",
+        "reason": "Will not work without allow_experimental_s3queue=1"
+    },
+    "02920_alter_column_of_projections": {
+        "reason": "requires different settings"
+    },
+    "02888_system_tables_with_inaccsessible_table_function": {
+        "reason": "todo investigate"
+    }
+}
\ No newline at end of file
diff --git a/tests/ci/report.py b/tests/ci/report.py
index a3c9b53637a9..4bc92014c27f 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -23,7 +23,7 @@
 from build_download_helper import get_gh_api
 from ci_config import CI_CONFIG, BuildConfig
 from ci_utils import normalize_string
-from env_helper import REPORT_PATH, TEMP_PATH
+from env_helper import REPORT_PATH, TEMP_PATH, ROOT_DIR
 
 logger = logging.getLogger(__name__)
 
@@ -325,6 +325,19 @@ def path_converter(obj):
 
 def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults:
     results = []  # type: TestResults
+
+    broken_tests_config_path = f"{ROOT_DIR}/tests/broken_tests.json"
+    if (
+        os.path.isfile(broken_tests_config_path)
+        and os.path.getsize(broken_tests_config_path) > 0
+    ):
+        with open(broken_tests_config_path, "r", encoding="utf-8") as broken_tests_file:
+            broken_tests = json.load(
+                broken_tests_file
+            )  # type: Dict[str, Dict[str, str]]
+    else:
+        broken_tests = {}
+
     with open(results_path, "r", encoding="utf-8") as descriptor:
         reader = csv.reader(descriptor, delimiter="\t")
         for line in reader:
@@ -351,6 +364,18 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes
                 else:
                     result.set_log_files(line[3])
 
+            if name in broken_tests.keys() and status == "FAIL":
+                fail_message = broken_tests[name].get("message", "")
+                if result.log_files and fail_message:
+                    for log_path in result.log_files:
+                        if log_path.endswith(".log"):
+                            with open(log_path) as log_file:
+                                if fail_message in log_file.read():
+                                    result.status = "XFAIL"
+                                    break
+                else:
+                    result.status = "XFAIL"
+
             results.append(result)
 
     return results

From 8405250d573a42d9a1d3723abb3ac0429c92b1db Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 31 Oct 2024 08:49:16 -0400
Subject: [PATCH 02/24] handle known broken tests in
 process_functional_tests_result.py

---
 docker/test/util/process_functional_tests_result.py | 10 ++++++++++
 tests/ci/functional_test_check.py                   |  1 +
 2 files changed, 11 insertions(+)

diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py
index fd4cc9f4bf76..30cba0d6690d 100755
--- a/docker/test/util/process_functional_tests_result.py
+++ b/docker/test/util/process_functional_tests_result.py
@@ -4,6 +4,7 @@
 import logging
 import argparse
 import csv
+import json
 
 OK_SIGN = "[ OK "
 FAIL_SIGN = "[ FAIL "
@@ -206,6 +207,7 @@ def write_results(results_file, status_file, results, status):
     parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
     parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
     parser.add_argument("--broken-tests", default="/analyzer_tech_debt.txt")
+    parser.add_argument("--broken-tests-json", default="/broken_tests.json")
     args = parser.parse_args()
 
     broken_tests = list()
@@ -213,6 +215,14 @@ def write_results(results_file, status_file, results, status):
         logging.info(f"File {args.broken_tests} with broken tests found")
         with open(args.broken_tests) as f:
             broken_tests = f.read().splitlines()
+
+    if os.path.exists(args.broken_tests_json):
+        logging.info(f"File {args.broken_tests_json} with broken tests found")
+
+        with open(args.broken_tests_json) as f:
+            broken_tests.extend(json.load(f).keys())
+
+    if broken_tests:
         logging.info(f"Broken tests in the list: {len(broken_tests)}")
 
     state, description, test_results = process_result(args.in_results_dir, broken_tests)
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index 8f1ffb05ac3c..14c07665642e 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -96,6 +96,7 @@ def get_run_command(
         f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt "
         if "analyzer" not in check_name
         else ""
+        f"--volume={repo_path}/tests/broken_tests.json:/broken_tests.json "
     )
 
     return (

From 08463a4bc2fd513e5ddd1c9cd6d670883755feb5 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 31 Oct 2024 08:53:04 -0400
Subject: [PATCH 03/24] remove broken test handling from report.py

---
 tests/ci/report.py | 27 +--------------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/tests/ci/report.py b/tests/ci/report.py
index 4bc92014c27f..a3c9b53637a9 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -23,7 +23,7 @@
 from build_download_helper import get_gh_api
 from ci_config import CI_CONFIG, BuildConfig
 from ci_utils import normalize_string
-from env_helper import REPORT_PATH, TEMP_PATH, ROOT_DIR
+from env_helper import REPORT_PATH, TEMP_PATH
 
 logger = logging.getLogger(__name__)
 
@@ -325,19 +325,6 @@ def path_converter(obj):
 
 def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults:
     results = []  # type: TestResults
-
-    broken_tests_config_path = f"{ROOT_DIR}/tests/broken_tests.json"
-    if (
-        os.path.isfile(broken_tests_config_path)
-        and os.path.getsize(broken_tests_config_path) > 0
-    ):
-        with open(broken_tests_config_path, "r", encoding="utf-8") as broken_tests_file:
-            broken_tests = json.load(
-                broken_tests_file
-            )  # type: Dict[str, Dict[str, str]]
-    else:
-        broken_tests = {}
-
     with open(results_path, "r", encoding="utf-8") as descriptor:
         reader = csv.reader(descriptor, delimiter="\t")
         for line in reader:
@@ -364,18 +351,6 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes
                 else:
                     result.set_log_files(line[3])
 
-            if name in broken_tests.keys() and status == "FAIL":
-                fail_message = broken_tests[name].get("message", "")
-                if result.log_files and fail_message:
-                    for log_path in result.log_files:
-                        if log_path.endswith(".log"):
-                            with open(log_path) as log_file:
-                                if fail_message in log_file.read():
-                                    result.status = "XFAIL"
-                                    break
-                else:
-                    result.status = "XFAIL"
-
             results.append(result)
 
     return results

From e0738a541a683b1dff132e0dce5c3ef76e270180 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 31 Oct 2024 09:09:33 -0400
Subject: [PATCH 04/24] add broken test handling back to
 integration_tests_runner.py

---
 tests/ci/integration_tests_runner.py | 35 ++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py
index f10c0a190e2f..18725a509095 100755
--- a/tests/ci/integration_tests_runner.py
+++ b/tests/ci/integration_tests_runner.py
@@ -474,6 +474,19 @@ def _get_parallel_tests_skip_list(repo_path):
             skip_list_tests = json.load(skip_list_file)
         return list(sorted(skip_list_tests))
 
+    @staticmethod
+    def _get_broken_tests_list(repo_path: str) -> dict:
+        skip_list_file_path = f"{repo_path}/tests/broken_tests.json"
+        if (
+            not os.path.isfile(skip_list_file_path)
+            or os.path.getsize(skip_list_file_path) == 0
+        ):
+            return {}
+
+        with open(skip_list_file_path, "r", encoding="utf-8") as skip_list_file:
+            skip_list_tests = json.load(skip_list_file)
+        return skip_list_tests
+
     @staticmethod
     def group_test_by_file(tests):
         result = {}  # type: Dict
@@ -891,6 +904,8 @@ def run_impl(self, repo_path, build_path):
             " ".join(not_found_tests[:3]),
         )
 
+        known_broken_tests = self._get_broken_tests_list(repo_path)
+
         grouped_tests = self.group_test_by_file(filtered_sequential_tests)
         i = 0
         for par_group in chunks(filtered_parallel_tests, PARALLEL_GROUP_SIZE):
@@ -921,6 +936,26 @@ def run_impl(self, repo_path, build_path):
             group_counters, group_test_times, log_paths = self.try_run_test_group(
                 repo_path, group, tests, MAX_RETRY, NUM_WORKERS
             )
+
+            for fail_status in ("ERROR", "FAILED"):
+                for failed_test in group_counters[fail_status]:
+                    if failed_test in known_broken_tests.keys():
+                        fail_message = known_broken_tests[failed_test].get("message")
+                        if not fail_message:
+                            mark_as_broken = True
+                        else:
+                            mark_as_broken = False
+                            for log_path in log_paths:
+                                if log_path.endswith(".log"):
+                                    with open(log_path) as log_file:
+                                        if fail_message in log_file.read():
+                                            mark_as_broken = True
+                                            break
+
+                        if mark_as_broken:
+                            group_counters[fail_status].remove(failed_test)
+                            group_counters["BROKEN"].append(failed_test)
+
             total_tests = 0
             for counter, value in group_counters.items():
                 logging.info(

From d4de139b828e85d34fe649f4a93438a07aeb033d Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 31 Oct 2024 15:24:06 -0400
Subject: [PATCH 05/24] update broken_tests.json

---
 tests/broken_tests.json | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/broken_tests.json b/tests/broken_tests.json
index 28ffc977d28b..2af5168381b0 100644
--- a/tests/broken_tests.json
+++ b/tests/broken_tests.json
@@ -1,11 +1,11 @@
 {
-    "test_replicated_merge_tree_replicated_db_ttl/test.py::test_replicated_db_and_ttl": {
-        "message": "DB::Exception: Replicated is an experimental database engine.",
-        "reason": "Will not work without allow_experimental_database_replicated=1"
+    "test_postgresql_replica_database_engine_2/test.py::test_quoting_publication": {
+        "message": "DB::Exception: Syntax error:",
+        "reason": "syntax error"
     },
-    "test_storage_s3_queue/test.py::test_upgrade": {
-        "message": "DB::Exception: S3Queue is experimental.",
-        "reason": "Will not work without allow_experimental_s3queue=1"
+    "test_distributed_inter_server_secret/test.py::test_secure_cluster_distributed_over_distributed_different_users": {
+        "message": "DB::NetException: Connection reset by peer, while reading from socket",
+        "reason": "network issue"
     },
     "02920_alter_column_of_projections": {
         "reason": "requires different settings"

From 293ea8b110e76ca69e756c9ad755f3f78c7196cd Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:12:32 -0500
Subject: [PATCH 06/24] update broken_tests.json

---
 tests/broken_tests.json | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/broken_tests.json b/tests/broken_tests.json
index 2af5168381b0..412cc3a4ede4 100644
--- a/tests/broken_tests.json
+++ b/tests/broken_tests.json
@@ -12,5 +12,11 @@
     },
     "02888_system_tables_with_inaccsessible_table_function": {
         "reason": "todo investigate"
+    },
+    "03094_grouparraysorted_memory": {
+        "reason": "fails with tsan"
+    },
+    "02700_s3_part_INT_MAX": {
+        "reason": "fails with asan"
     }
 }
\ No newline at end of file

From 7ed2719730e16551bf2150bc697fd8cf0e582cc1 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 14 Nov 2024 11:26:08 -0500
Subject: [PATCH 07/24] update hdfs image

---
 tests/integration/compose/docker_compose_hdfs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/compose/docker_compose_hdfs.yml b/tests/integration/compose/docker_compose_hdfs.yml
index 1cae54ad9e1a..40a10df01f7c 100644
--- a/tests/integration/compose/docker_compose_hdfs.yml
+++ b/tests/integration/compose/docker_compose_hdfs.yml
@@ -1,7 +1,7 @@
 version: '2.3'
 services:
     hdfs1:
-        image: sequenceiq/hadoop-docker:2.7.0
+        image: prasanthj/docker-hadoop:2.6.0
         hostname: hdfs1
         restart: always
         expose:

From 7fa57c3f05fd3417a1cc80afecde5f5cdc83b819 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Wed, 20 Nov 2024 11:25:25 -0500
Subject: [PATCH 08/24] fix stateless test crossout

---
 tests/broken_tests.json           | 2 +-
 tests/ci/functional_test_check.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/broken_tests.json b/tests/broken_tests.json
index 412cc3a4ede4..b0d374916c12 100644
--- a/tests/broken_tests.json
+++ b/tests/broken_tests.json
@@ -14,7 +14,7 @@
         "reason": "todo investigate"
     },
     "03094_grouparraysorted_memory": {
-        "reason": "fails with tsan"
+        "reason": "flaky"
     },
     "02700_s3_part_INT_MAX": {
         "reason": "fails with asan"
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index 14c07665642e..4cd022c6bf81 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -96,6 +96,8 @@ def get_run_command(
         f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt "
         if "analyzer" not in check_name
         else ""
+    )
+    volume_with_broken_test += (
         f"--volume={repo_path}/tests/broken_tests.json:/broken_tests.json "
     )
 

From ee493a9fc32e77013c9d81859e1b5105959fa8b3 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Mon, 3 Mar 2025 14:23:15 -0500
Subject: [PATCH 09/24] update broken tests list with upstream fails

---
 tests/broken_tests.json | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/tests/broken_tests.json b/tests/broken_tests.json
index b0d374916c12..a6da072c8b86 100644
--- a/tests/broken_tests.json
+++ b/tests/broken_tests.json
@@ -18,5 +18,41 @@
     },
     "02700_s3_part_INT_MAX": {
         "reason": "fails with asan"
+    },
+    "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_avg":{
+        "reason": "not run by upstream"
+    },
+    "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[1000]":{
+        "reason": "not run by upstream"
+    },
+    "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[500000] ":{
+        "reason": "not run by upstream"
+    },
+    "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[1000]":{
+        "reason": "not run by upstream"
+    },
+    "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000]":{
+        "reason": "not run by upstream"
+    },
+    "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[cache-True] ":{
+        "reason": "errors upstream"
+    },
+    "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[direct-True]":{
+        "reason": "errors upstream"
+    },
+    "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[flat-True]":{
+        "reason": "errors upstream"
+    },
+    "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[hashed-True]":{
+        "reason": "errors upstream"
+    },
+    "test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py::test_simple_ssl[flat-True]":{
+        "reason": "errors upstream"
+    },
+    "test_storage_mongodb/test.py::test_secure_connection[True]":{
+        "reason": "fails upstream"
+    },
+    "test_table_function_mongodb/test.py::test_secure_connection[True]":{
+        "reason": "fails upstream"
     }
-}
\ No newline at end of file
+}

From 3b136bcdd5164633151c701fb7c3c8da28bb4ae8 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Mon, 3 Mar 2025 14:42:50 -0500
Subject: [PATCH 10/24] Adding automated report

---
 .github/create_combined_ci_report.py   | 294 +++++++++++++++++++++++++
 .github/workflows/release_branches.yml |  28 +++
 2 files changed, 322 insertions(+)
 create mode 100755 .github/create_combined_ci_report.py

diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py
new file mode 100755
index 000000000000..4d90171b03d4
--- /dev/null
+++ b/.github/create_combined_ci_report.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+import argparse
+import os
+from pathlib import Path
+from itertools import combinations
+import json
+
+import requests
+from clickhouse_driver import Client
+import boto3
+from botocore.exceptions import NoCredentialsError
+
+DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
+DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
+DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
+S3_BUCKET = "altinity-build-artifacts"
+
+
+def get_checks_fails(client: Client, job_url: str):
+    """
+    Get tests that did not succeed for the given job URL.
+    Exclude checks that have status 'error' as they are counted in get_checks_errors.
+    """
+    columns = (
+        "check_status, check_name, test_status, test_name, report_url as results_link"
+    )
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url='{job_url}'
+                AND test_status IN ('FAIL', 'ERROR')
+                AND check_status!='error'
+                ORDER BY check_name, test_name
+                """
+    return client.query_dataframe(query)
+
+
+def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
+    """
+    Get tests that are known to fail for the given job URL.
+    """
+    assert len(known_fails) > 0, "cannot query the database with empty known fails"
+    columns = (
+        "check_status, check_name, test_status, test_name, report_url as results_link"
+    )
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url='{job_url}'
+                AND test_status='BROKEN'
+                AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
+                ORDER BY test_name, check_name
+                """
+
+    df = client.query_dataframe(query)
+
+    df.insert(
+        len(df.columns) - 1,
+        "reason",
+        df["test_name"]
+        .cat.remove_unused_categories()
+        .apply(
+            lambda test_name: known_fails[test_name].get("reason", "No reason given")
+        ),
+    )
+
+    return df
+
+
+def get_checks_errors(client: Client, job_url: str):
+    """
+    Get checks that have status 'error' for the given job URL.
+    """
+    columns = (
+        "check_status, check_name, test_status, test_name, report_url as results_link"
+    )
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url='{job_url}'
+                AND check_status=='error'
+                ORDER BY check_name, test_name
+                """
+    return client.query_dataframe(query)
+
+
+def drop_prefix_rows(df, column_to_clean):
+    """
+    Drop rows from the dataframe if:
+    - the row matches another row completely except for the specified column
+    - the specified column of that row is a prefix of the same column in another row
+    """
+    to_drop = set()
+    reference_columns = [col for col in df.columns if col != column_to_clean]
+    for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2):
+        if all(row_1[col] == row_2[col] for col in reference_columns):
+            if row_2[column_to_clean].startswith(row_1[column_to_clean]):
+                to_drop.add(i)
+            elif row_1[column_to_clean].startswith(row_2[column_to_clean]):
+                to_drop.add(j)
+    return df.drop(to_drop)
+
+
+def get_regression_fails(client: Client, job_url: str):
+    """
+    Get regression tests that did not succeed for the given job URL.
+    """
+    # If you rename the alias for report_url, also update the formatters in format_results_as_html_table
+    # Nested SELECT handles test reruns
+    query = f"""SELECT arch, job_name, status, test_name, results_link
+            FROM (
+               SELECT
+                    architecture as arch,
+                    test_name,
+                    argMax(result, start_time) AS status,
+                    job_url,
+                    job_name,
+                    report_url as results_link
+               FROM `gh-data`.clickhouse_regression_results
+               GROUP BY architecture, test_name, job_url, job_name, report_url, start_time
+               ORDER BY start_time DESC, length(test_name) DESC
+            )
+            WHERE job_url='{job_url}'
+            AND status IN ('Fail', 'Error')
+            """
+    df = client.query_dataframe(query)
+    df = drop_prefix_rows(df, "test_name")
+    df["job_name"] = df["job_name"].str.title()
+    return df
+
+
+def url_to_html_link(url: str) -> str:
+    if not url:
+        return ""
+    text = url.split("/")[-1]
+    if not text:
+        text = "results"
+    return f'<a href="{url}">{text}</a>'
+
+
+def format_test_name_for_linewrap(text: str) -> str:
+    """Tweak the test name to improve line wrapping."""
+    return text.replace(".py::", "/")
+
+
+def format_results_as_html_table(results) -> str:
+    if len(results) == 0:
+        return "<p>Nothing to report</p>"
+    results.columns = [col.replace("_", " ").title() for col in results.columns]
+    html = (
+        results.to_html(
+            index=False,
+            formatters={
+                "Results Link": url_to_html_link,
+                "Test Name": format_test_name_for_linewrap,
+            },
+            escape=False,
+        )  # tbody/thead tags interfere with the table sorting script
+        .replace("<tbody>\n", "")
+        .replace("</tbody>\n", "")
+        .replace("<thead>\n", "")
+        .replace("</thead>\n", "")
+        .replace('<table border="1"', '<table style="min-width: min(900px, 98vw);"')
+    )
+    return html
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Create a combined CI report.")
+    parser.add_argument(
+        "--actions-run-url", required=True, help="URL of the actions run"
+    )
+    parser.add_argument(
+        "--pr-number", required=True, help="Pull request number for the S3 path"
+    )
+    parser.add_argument(
+        "--commit-sha", required=True, help="Commit SHA for the S3 path"
+    )
+    parser.add_argument(
+        "--no-upload", action="store_true", help="Do not upload the report"
+    )
+    parser.add_argument(
+        "--known-fails", type=str, help="Path to the file with known fails"
+    )
+    parser.add_argument(
+        "--mark-preview", action="store_true", help="Mark the report as a preview"
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    db_client = Client(
+        host=os.getenv(DATABASE_HOST_VAR),
+        user=os.getenv(DATABASE_USER_VAR),
+        password=os.getenv(DATABASE_PASSWORD_VAR),
+        port=9440,
+        secure="y",
+        verify=False,
+        settings={"use_numpy": True},
+    )
+
+    s3_path = (
+        f"https://s3.amazonaws.com/{S3_BUCKET}/{args.pr_number}/{args.commit_sha}/"
+    )
+    report_destination_url = s3_path + "combined_report.html"
+    ci_running_report_url = s3_path + "ci_running.html"
+
+    response = requests.get(ci_running_report_url)
+    if response.status_code == 200:
+        ci_running_report: str = response.text
+    else:
+        print(
+            f"Failed to download CI running report. Status code: {response.status_code}, Response: {response.text}"
+        )
+        exit(1)
+
+    fail_results = {
+        "checks_fails": get_checks_fails(db_client, args.actions_run_url),
+        "checks_known_fails": [],
+        "checks_errors": get_checks_errors(db_client, args.actions_run_url),
+        "regression_fails": get_regression_fails(db_client, args.actions_run_url),
+    }
+
+    if args.known_fails:
+        if not os.path.exists(args.known_fails):
+            print(f"Known fails file {args.known_fails} not found.")
+            exit(1)
+
+        with open(args.known_fails) as f:
+            known_fails = json.load(f)
+
+        if known_fails:
+            fail_results["checks_known_fails"] = get_checks_known_fails(
+                db_client, args.actions_run_url, known_fails
+            )
+
+    combined_report = (
+        ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for")
+        .replace(
+            "<table>",
+            f"""<h2>Table of Contents</h2>
+{'<p style="font-weight: bold;color: #F00;">This is a preview. FinishCheck has not completed.</p>' if args.mark_preview else ""}
+<ul>
+    <li><a href="#ci-jobs-status">CI Jobs Status</a></li>
+    <li><a href="#checks-errors">Checks Errors</a> ({len(fail_results['checks_errors'])})</li>
+    <li><a href="#checks-fails">Checks New Fails</a> ({len(fail_results['checks_fails'])})</li>
+    <li><a href="#regression-fails">Regression New Fails</a> ({len(fail_results['regression_fails'])})</li>
+    <li><a href="#checks-known-fails">Checks Known Fails</a> ({len(fail_results['checks_known_fails'])})</li>
+</ul>
+
+<h2 id="ci-jobs-status">CI Jobs Status</h2>
+<table>""",
+            1,
+        )
+        .replace(
+            "</table>",
+            f"""</table>
+
+<h2 id="checks-errors">Checks Errors</h2>
+{format_results_as_html_table(fail_results['checks_errors'])}
+
+<h2 id="checks-fails">Checks New Fails</h2>
+{format_results_as_html_table(fail_results['checks_fails'])}
+
+<h2 id="regression-fails">Regression New Fails</h2>
+{format_results_as_html_table(fail_results['regression_fails'])}
+
+<h2 id="checks-known-fails">Checks Known Fails</h2>
+{format_results_as_html_table(fail_results['checks_known_fails'])}
+""",
+            1,
+        )
+    )
+    report_path = Path("combined_report.html")
+    report_path.write_text(combined_report, encoding="utf-8")
+
+    if args.no_upload:
+        print(f"Report saved to {report_path}")
+        exit(0)
+
+    # Upload the report to S3
+    s3_client = boto3.client("s3")
+
+    try:
+        s3_client.put_object(
+            Bucket=S3_BUCKET,
+            Key=f"{args.pr_number}/{args.commit_sha}/combined_report.html",
+            Body=combined_report,
+            ContentType="text/html; charset=utf-8",
+        )
+    except NoCredentialsError:
+        print("Credentials not available for S3 upload.")
+
+    print(report_destination_url)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index b2495ac0f814..ee26102405a7 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -549,3 +549,31 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
+      - name: Create and upload combined report
+          IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
+          if [[ -n $IS_VALID_URL ]]; then
+            echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY
+            exit 1
+          fi        if: ${{ !cancelled() }}
+        env:
+          CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }}
+          CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }}
+          CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }}
+          COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+          PR_NUMBER: ${{ github.event.number }}
+          ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
+        shell: bash
+        run: |
+          pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0
+
+          REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json)
+
+          IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
+          if [[ -n $IS_VALID_URL ]]; then
+            echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY
+            exit 1
+          fi
\ No newline at end of file

From 23e18745c1877e0a0221ceaa882c45f97e2acf52 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Mon, 3 Mar 2025 15:19:53 -0500
Subject: [PATCH 11/24] fix workflow error

---
 .github/workflows/release_branches.yml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index ee26102405a7..ccba3503b3e7 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -550,13 +550,6 @@ jobs:
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
       - name: Create and upload combined report
-          IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
-          if [[ -n $IS_VALID_URL ]]; then
-            echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
-          else
-            echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY
-            exit 1
-          fi        if: ${{ !cancelled() }}
         env:
           CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }}
           CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }}

From eb4a33201d6b071b034756fa7a24b2f85e469c71 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Tue, 4 Mar 2025 08:11:44 -0500
Subject: [PATCH 12/24] Always run finishcheck

---
 .github/workflows/release_branches.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index ccba3503b3e7..6b19cab0cfdf 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -506,7 +506,7 @@ jobs:
       runner_type: altinity-style-checker-aarch64
       data: ${{ needs.RunConfig.outputs.data }}
   FinishCheck:
-    if: ${{ !failure() && !cancelled() }}
+    if: ${{ !cancelled() }}
     needs:
       - DockerServerImage
       - DockerKeeperImage

From 336b35be8681e465a4ff28d2c3e19f9131879e78 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Tue, 4 Mar 2025 11:55:53 -0500
Subject: [PATCH 13/24] fix extra space in broken test name

---
 tests/broken_tests.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/broken_tests.json b/tests/broken_tests.json
index a6da072c8b86..d9d0008ef88f 100644
--- a/tests/broken_tests.json
+++ b/tests/broken_tests.json
@@ -34,7 +34,7 @@
     "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000]":{
         "reason": "not run by upstream"
     },
-    "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[cache-True] ":{
+    "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[cache-True]": {
         "reason": "errors upstream"
     },
     "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[direct-True]":{

From 16f0806b759521623ee280de06ec2ab4c71f48d0 Mon Sep 17 00:00:00 2001
From: Stuart <146047128+strtgbb@users.noreply.github.com>
Date: Wed, 5 Mar 2025 08:57:56 -0500
Subject: [PATCH 14/24] fix find and replace

---
 .github/create_combined_ci_report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py
index 4d90171b03d4..19c7a6c05472 100755
--- a/.github/create_combined_ci_report.py
+++ b/.github/create_combined_ci_report.py
@@ -231,7 +231,7 @@ def main():
             )
 
     combined_report = (
-        ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for")
+        ci_running_report.replace("ClickHouse CI Running for", "Combined CI Report for")
         .replace(
             "<table>",
             f"""<h2>Table of Contents</h2>

From e0b41afa70284edbb78856b6928146cad3a7a2f6 Mon Sep 17 00:00:00 2001
From: Your Name <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 13 Mar 2025 22:33:58 -0400
Subject: [PATCH 15/24] update regression hash

---
 .github/workflows/release_branches.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 6b19cab0cfdf..f6c3621831ba 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -472,7 +472,7 @@ jobs:
     secrets: inherit
     with:
       runner_type: altinity-type-cpx51, altinity-image-x86-app-docker-ce, altinity-setup-regression
-      commit: 53d73ed32155a8a17ee0d0cdb15aee96c98010a2
+      commit: 0fdb555b36d0ea6a6affc5cf87e593b5d8944c0a
       arch: release 
       build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout_minutes: 300
@@ -483,7 +483,7 @@ jobs:
     secrets: inherit
     with:
       runner_type: altinity-type-cax41, altinity-image-arm-app-docker-ce, altinity-setup-regression
-      commit: 53d73ed32155a8a17ee0d0cdb15aee96c98010a2
+      commit: 0fdb555b36d0ea6a6affc5cf87e593b5d8944c0a
       arch: aarch64
       build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout_minutes: 300

From c6828583992efff1097aafe3c864f44eb6909d60 Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Wed, 16 Apr 2025 10:11:51 -0400
Subject: [PATCH 16/24] add grype scan

---
 .github/workflows/release_branches.yml | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index e8540026371d..e962e1391ce7 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -168,6 +168,23 @@ jobs:
       test_name: Docker keeper image
       runner_type: altinity-func-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  GrypeScan:
+    needs: [RunConfig, DockerServerImage, DockerKeeperImage]
+    if: ${{ !failure() && !cancelled() }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - image: server
+            suffix: ''
+          - image: server
+            suffix: '-alpine'
+          - image: keeper
+            suffix: ''
+    uses: ./.github/workflows/grype_scan.yml@antalya
+    secrets: inherit
+    with:
+      docker_image: altinityinfra/clickhouse-${{ matrix.image }}:${{ github.event.pull_request.number || 0 }}-${{ fromJson(needs.RunConfig.outputs.data).version }}${{ matrix.suffix }}
 ############################################################################################
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
@@ -555,7 +572,7 @@ jobs:
           CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }}
           CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }}
           COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-          PR_NUMBER: ${{ github.event.number }}
+          PR_NUMBER: ${{ github.event.pull_request.number || 0  }}
           ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
         shell: bash
         run: |

From 45779003d7141ad171a611ea39ab74adfcf31aec Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Wed, 16 Apr 2025 10:20:05 -0400
Subject: [PATCH 17/24] update report to new format

---
 .github/create_combined_ci_report.py   | 294 ------------
 .github/create_workflow_report.py      | 631 +++++++++++++++++++++++++
 .github/workflows/release_branches.yml |   4 +-
 3 files changed, 633 insertions(+), 296 deletions(-)
 delete mode 100755 .github/create_combined_ci_report.py
 create mode 100755 .github/create_workflow_report.py

diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py
deleted file mode 100755
index 19c7a6c05472..000000000000
--- a/.github/create_combined_ci_report.py
+++ /dev/null
@@ -1,294 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import os
-from pathlib import Path
-from itertools import combinations
-import json
-
-import requests
-from clickhouse_driver import Client
-import boto3
-from botocore.exceptions import NoCredentialsError
-
-DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
-DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
-DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
-S3_BUCKET = "altinity-build-artifacts"
-
-
-def get_checks_fails(client: Client, job_url: str):
-    """
-    Get tests that did not succeed for the given job URL.
-    Exclude checks that have status 'error' as they are counted in get_checks_errors.
-    """
-    columns = (
-        "check_status, check_name, test_status, test_name, report_url as results_link"
-    )
-    query = f"""SELECT {columns} FROM `gh-data`.checks
-                WHERE task_url='{job_url}'
-                AND test_status IN ('FAIL', 'ERROR')
-                AND check_status!='error'
-                ORDER BY check_name, test_name
-                """
-    return client.query_dataframe(query)
-
-
-def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
-    """
-    Get tests that are known to fail for the given job URL.
-    """
-    assert len(known_fails) > 0, "cannot query the database with empty known fails"
-    columns = (
-        "check_status, check_name, test_status, test_name, report_url as results_link"
-    )
-    query = f"""SELECT {columns} FROM `gh-data`.checks
-                WHERE task_url='{job_url}'
-                AND test_status='BROKEN'
-                AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
-                ORDER BY test_name, check_name
-                """
-
-    df = client.query_dataframe(query)
-
-    df.insert(
-        len(df.columns) - 1,
-        "reason",
-        df["test_name"]
-        .cat.remove_unused_categories()
-        .apply(
-            lambda test_name: known_fails[test_name].get("reason", "No reason given")
-        ),
-    )
-
-    return df
-
-
-def get_checks_errors(client: Client, job_url: str):
-    """
-    Get checks that have status 'error' for the given job URL.
-    """
-    columns = (
-        "check_status, check_name, test_status, test_name, report_url as results_link"
-    )
-    query = f"""SELECT {columns} FROM `gh-data`.checks
-                WHERE task_url='{job_url}'
-                AND check_status=='error'
-                ORDER BY check_name, test_name
-                """
-    return client.query_dataframe(query)
-
-
-def drop_prefix_rows(df, column_to_clean):
-    """
-    Drop rows from the dataframe if:
-    - the row matches another row completely except for the specified column
-    - the specified column of that row is a prefix of the same column in another row
-    """
-    to_drop = set()
-    reference_columns = [col for col in df.columns if col != column_to_clean]
-    for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2):
-        if all(row_1[col] == row_2[col] for col in reference_columns):
-            if row_2[column_to_clean].startswith(row_1[column_to_clean]):
-                to_drop.add(i)
-            elif row_1[column_to_clean].startswith(row_2[column_to_clean]):
-                to_drop.add(j)
-    return df.drop(to_drop)
-
-
-def get_regression_fails(client: Client, job_url: str):
-    """
-    Get regression tests that did not succeed for the given job URL.
-    """
-    # If you rename the alias for report_url, also update the formatters in format_results_as_html_table
-    # Nested SELECT handles test reruns
-    query = f"""SELECT arch, job_name, status, test_name, results_link
-            FROM (
-               SELECT
-                    architecture as arch,
-                    test_name,
-                    argMax(result, start_time) AS status,
-                    job_url,
-                    job_name,
-                    report_url as results_link
-               FROM `gh-data`.clickhouse_regression_results
-               GROUP BY architecture, test_name, job_url, job_name, report_url, start_time
-               ORDER BY start_time DESC, length(test_name) DESC
-            )
-            WHERE job_url='{job_url}'
-            AND status IN ('Fail', 'Error')
-            """
-    df = client.query_dataframe(query)
-    df = drop_prefix_rows(df, "test_name")
-    df["job_name"] = df["job_name"].str.title()
-    return df
-
-
-def url_to_html_link(url: str) -> str:
-    if not url:
-        return ""
-    text = url.split("/")[-1]
-    if not text:
-        text = "results"
-    return f'<a href="{url}">{text}</a>'
-
-
-def format_test_name_for_linewrap(text: str) -> str:
-    """Tweak the test name to improve line wrapping."""
-    return text.replace(".py::", "/")
-
-
-def format_results_as_html_table(results) -> str:
-    if len(results) == 0:
-        return "<p>Nothing to report</p>"
-    results.columns = [col.replace("_", " ").title() for col in results.columns]
-    html = (
-        results.to_html(
-            index=False,
-            formatters={
-                "Results Link": url_to_html_link,
-                "Test Name": format_test_name_for_linewrap,
-            },
-            escape=False,
-        )  # tbody/thead tags interfere with the table sorting script
-        .replace("<tbody>\n", "")
-        .replace("</tbody>\n", "")
-        .replace("<thead>\n", "")
-        .replace("</thead>\n", "")
-        .replace('<table border="1"', '<table style="min-width: min(900px, 98vw);"')
-    )
-    return html
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Create a combined CI report.")
-    parser.add_argument(
-        "--actions-run-url", required=True, help="URL of the actions run"
-    )
-    parser.add_argument(
-        "--pr-number", required=True, help="Pull request number for the S3 path"
-    )
-    parser.add_argument(
-        "--commit-sha", required=True, help="Commit SHA for the S3 path"
-    )
-    parser.add_argument(
-        "--no-upload", action="store_true", help="Do not upload the report"
-    )
-    parser.add_argument(
-        "--known-fails", type=str, help="Path to the file with known fails"
-    )
-    parser.add_argument(
-        "--mark-preview", action="store_true", help="Mark the report as a preview"
-    )
-    return parser.parse_args()
-
-
-def main():
-    args = parse_args()
-
-    db_client = Client(
-        host=os.getenv(DATABASE_HOST_VAR),
-        user=os.getenv(DATABASE_USER_VAR),
-        password=os.getenv(DATABASE_PASSWORD_VAR),
-        port=9440,
-        secure="y",
-        verify=False,
-        settings={"use_numpy": True},
-    )
-
-    s3_path = (
-        f"https://s3.amazonaws.com/{S3_BUCKET}/{args.pr_number}/{args.commit_sha}/"
-    )
-    report_destination_url = s3_path + "combined_report.html"
-    ci_running_report_url = s3_path + "ci_running.html"
-
-    response = requests.get(ci_running_report_url)
-    if response.status_code == 200:
-        ci_running_report: str = response.text
-    else:
-        print(
-            f"Failed to download CI running report. Status code: {response.status_code}, Response: {response.text}"
-        )
-        exit(1)
-
-    fail_results = {
-        "checks_fails": get_checks_fails(db_client, args.actions_run_url),
-        "checks_known_fails": [],
-        "checks_errors": get_checks_errors(db_client, args.actions_run_url),
-        "regression_fails": get_regression_fails(db_client, args.actions_run_url),
-    }
-
-    if args.known_fails:
-        if not os.path.exists(args.known_fails):
-            print(f"Known fails file {args.known_fails} not found.")
-            exit(1)
-
-        with open(args.known_fails) as f:
-            known_fails = json.load(f)
-
-        if known_fails:
-            fail_results["checks_known_fails"] = get_checks_known_fails(
-                db_client, args.actions_run_url, known_fails
-            )
-
-    combined_report = (
-        ci_running_report.replace("ClickHouse CI Running for", "Combined CI Report for")
-        .replace(
-            "<table>",
-            f"""<h2>Table of Contents</h2>
-{'<p style="font-weight: bold;color: #F00;">This is a preview. FinishCheck has not completed.</p>' if args.mark_preview else ""}
-<ul>
-    <li><a href="#ci-jobs-status">CI Jobs Status</a></li>
-    <li><a href="#checks-errors">Checks Errors</a> ({len(fail_results['checks_errors'])})</li>
-    <li><a href="#checks-fails">Checks New Fails</a> ({len(fail_results['checks_fails'])})</li>
-    <li><a href="#regression-fails">Regression New Fails</a> ({len(fail_results['regression_fails'])})</li>
-    <li><a href="#checks-known-fails">Checks Known Fails</a> ({len(fail_results['checks_known_fails'])})</li>
-</ul>
-
-<h2 id="ci-jobs-status">CI Jobs Status</h2>
-<table>""",
-            1,
-        )
-        .replace(
-            "</table>",
-            f"""</table>
-
-<h2 id="checks-errors">Checks Errors</h2>
-{format_results_as_html_table(fail_results['checks_errors'])}
-
-<h2 id="checks-fails">Checks New Fails</h2>
-{format_results_as_html_table(fail_results['checks_fails'])}
-
-<h2 id="regression-fails">Regression New Fails</h2>
-{format_results_as_html_table(fail_results['regression_fails'])}
-
-<h2 id="checks-known-fails">Checks Known Fails</h2>
-{format_results_as_html_table(fail_results['checks_known_fails'])}
-""",
-            1,
-        )
-    )
-    report_path = Path("combined_report.html")
-    report_path.write_text(combined_report, encoding="utf-8")
-
-    if args.no_upload:
-        print(f"Report saved to {report_path}")
-        exit(0)
-
-    # Upload the report to S3
-    s3_client = boto3.client("s3")
-
-    try:
-        s3_client.put_object(
-            Bucket=S3_BUCKET,
-            Key=f"{args.pr_number}/{args.commit_sha}/combined_report.html",
-            Body=combined_report,
-            ContentType="text/html; charset=utf-8",
-        )
-    except NoCredentialsError:
-        print("Credentials not available for S3 upload.")
-
-    print(report_destination_url)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py
new file mode 100755
index 000000000000..4257925b826d
--- /dev/null
+++ b/.github/create_workflow_report.py
@@ -0,0 +1,631 @@
+#!/usr/bin/env python3
+import argparse
+import os
+from pathlib import Path
+from itertools import combinations
+import json
+from datetime import datetime
+
+import requests
+import pandas as pd
+from clickhouse_driver import Client
+import boto3
+from botocore.exceptions import NoCredentialsError
+import pandas as pd
+
+DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
+DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
+DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
+S3_BUCKET = "altinity-build-artifacts"
+
+
+css = """
+    /* Base colors for Altinity */
+    :root {
+        --altinity-background: #000D45;
+        --altinity-accent: #189DCF;
+        --altinity-highlight: #FFC600;
+        --altinity-gray: #6c757d;
+        --altinity-light-gray: #f8f9fa;
+        --altinity-white: #ffffff;
+    }
+
+    /* Body and heading fonts */
+    body {
+        font-family: Arimo, "Proxima Nova", "Helvetica Neue", Helvetica, Arial, sans-serif;
+        font-size: 1rem;
+        background-color: var(--altinity-background);
+        color: var(--altinity-light-gray);
+        padding: 2rem;
+    }
+
+    h1, h2, h3, h4, h5, h6 {
+        font-family: Figtree, "Proxima Nova", "Helvetica Neue", Helvetica, Arial, sans-serif;
+        color: var(--altinity-white);
+    }
+
+    .logo {
+        width: auto;
+        height: 5em;
+    }
+
+    /* General table styling */
+    table {
+        min-width: min(900px, 98vw);
+        margin: 1rem 0;
+        border-collapse: collapse;
+        background-color: var(--altinity-white);
+        border: 1px solid var(--altinity-accent);
+        box-shadow: 0 0 8px rgba(0, 0, 0, 0.05);
+        color: var(--altinity-background);
+    }
+
+    /* Table header styling */
+    th {
+        background-color: var(--altinity-accent);
+        color: var(--altinity-white);
+        padding: 10px 16px;
+        text-align: left;
+        border: none;
+        border-bottom: 2px solid var(--altinity-background);
+        white-space: nowrap;
+    }
+    th.hth {
+        border-bottom: 1px solid var(--altinity-accent);
+        border-right: 2px solid var(--altinity-background);
+    }
+
+    /* Table header sorting styling */
+    th {
+        cursor: pointer;
+    }
+    th.no-sort {
+        pointer-events: none;
+    }
+    th::after, 
+    th::before {
+        transition: color 0.2s ease-in-out;
+        font-size: 1.2em;
+        color: transparent;
+    }
+    th::after {
+        margin-left: 3px;
+        content: '\\025B8';
+    }
+    th:hover::after {
+        color: inherit;
+    }
+    th.dir-d::after {
+        color: inherit;
+        content: '\\025BE';
+    }
+    th.dir-u::after {
+        color: inherit;
+        content: '\\025B4';
+    }
+
+    /* Table body row styling */
+    tr:hover {
+        background-color: var(--altinity-light-gray);
+    }
+
+    /* Table cell styling */
+    td {
+        padding: 8px 8px;
+        border: 1px solid var(--altinity-accent);
+    }
+
+    /* Link styling */
+    a {
+        color: var(--altinity-accent);
+        text-decoration: none;
+    }
+    a:hover {
+        color: var(--altinity-highlight);
+        text-decoration: underline;
+    }
+"""
+
+script = """
+<script>
+    document.addEventListener('click', function (e) {
+    try {
+        function findElementRecursive(element, tag) {
+        return element.nodeName === tag ? element : 
+        findElementRecursive(element.parentNode, tag)
+        }
+        var descending_th_class = ' dir-d '
+        var ascending_th_class = ' dir-u '
+        var ascending_table_sort_class = 'asc'
+        var regex_dir = / dir-(u|d) /
+        var alt_sort = e.shiftKey || e.altKey
+        var element = findElementRecursive(e.target, 'TH')
+        var tr = findElementRecursive(element, 'TR')
+        var table = findElementRecursive(tr, 'TABLE')
+        function reClassify(element, dir) {
+        element.className = element.className.replace(regex_dir, '') + dir
+        }
+        function getValue(element) {
+        return (
+            (alt_sort && element.getAttribute('data-sort-alt')) || 
+        element.getAttribute('data-sort') || element.innerText
+        )
+        }
+        if (true) {
+        var column_index
+        var nodes = tr.cells
+        for (var i = 0; i < nodes.length; i++) {
+            if (nodes[i] === element) {
+            column_index = element.getAttribute('data-sort-col') || i
+            } else {
+            reClassify(nodes[i], '')
+            }
+        }
+        var dir = descending_th_class
+        if (
+            element.className.indexOf(descending_th_class) !== -1 ||
+            (table.className.indexOf(ascending_table_sort_class) !== -1 &&
+            element.className.indexOf(ascending_th_class) == -1)
+        ) {
+            dir = ascending_th_class
+        }
+        reClassify(element, dir)
+        var org_tbody = table.tBodies[0]
+        var rows = [].slice.call(org_tbody.rows, 0)
+        var reverse = dir === ascending_th_class
+        rows.sort(function (a, b) {
+            var x = getValue((reverse ? a : b).cells[column_index])
+            var y = getValue((reverse ? b : a).cells[column_index])
+            return isNaN(x - y) ? x.localeCompare(y) : x - y
+        })
+        var clone_tbody = org_tbody.cloneNode()
+        while (rows.length) {
+            clone_tbody.appendChild(rows.splice(0, 1)[0])
+        }
+        table.replaceChild(clone_tbody, org_tbody)
+        }
+    } catch (error) {
+    }
+    });
+</script>
+"""
+
+logo = """
+<p><img class="logo" src="data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz48c3ZnIGlkPSJhIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NjEuNTUgMTA1Ljk5Ij48ZGVmcz48c3R5bGU+LmJ7ZmlsbDojZmZmO30uY3tmaWxsOiMxOTlkY2Y7fTwvc3R5bGU+PC9kZWZzPjxnPjxwb2x5Z29uIGNsYXNzPSJjIiBwb2ludHM9Ii4wOSA1MC45NiA2Ni44NiAxMi4xMiA0NS44OCAwIDQ1Ljg4IC4wNCAyMi45NCAxMy4zIDIyLjkzIDEzLjMgMjIuOTMgMTMuMyAuMDkgMjYuNDkgLjA5IDI2LjQ5IC4wOSAyNi40OSAwIDI2LjU0IC4wOSAyNi41OSAuMDkgNTAuOTYiLz48cG9seWdvbiBjbGFzcz0iYyIgcG9pbnRzPSI0LjIxIDUzLjE5IDIyLjk0IDY0LjA4IDIyLjk0IDQyLjI5IDQuMjEgNTMuMTkiLz48cG9seWdvbiBjbGFzcz0iYyIgcG9pbnRzPSI0My43NSA4MC43OSAuMjMgNTUuNTEgLjA5IDU1LjU5IC4wOSA3OS40MyAwIDc5LjQ4IC4wOSA3OS41NCAuMDkgMTA1Ljk5IDIyLjggOTIuODggMjIuOCA5Mi44OCA0My43NSA4MC43OSIvPjxwb2x5Z29uIGNsYXNzPSJjIiBwb2ludHM9IjY0LjIyIDM2Ljk2IDY2Ljc5IDM4LjQ1IDg5LjYxIDI1LjE3IDcwLjkyIDE0LjM4IDQ4LjAzIDI3LjcgNjQuMTggMzcuMDIgNjQuMjIgMzYuOTYiLz48Zz48cG9seWdvbiBjbGFzcz0iYyIgcG9pbnRzPSI3Ni4zMyA0NCA5MS42NiA1Mi45MiA5MS42NiA1Mi44MiA5MS42MyA1Mi44MyA3Ni4zMyA0NCIvPjxwb2x5Z29uIGNsYXNzPSJjIiBwb2ludHM9IjY4LjcxIDQ0LjIgNjguNzEgOTIuNTEgOTEuNjYgMTA1Ljc2IDkxLjY2IDU3LjU1IDY4LjcxIDQ0LjIiLz48L2c+PHBvbHlnb24gY2xhc3M9ImMiIHBvaW50cz0iNzAuNzcgNDAuNzYgNzYuMjggNDMuOTcgOTEuNjYgNTIuODUgOTEuNjYgMjguNjEgNzAuNzcgNDAuNzYiLz48L2c+PHBhdGggY2xhc3M9ImIiIGQ9Ik0xNDkuOTIsMjkuNjZoMTIuMzhsMTkuNzIsNDYuNjdoLTEzLjc3bC0zLjM4LTguMjdoLTE3Ljg3bC0zLjMxLDguMjdoLTEzLjVsMTkuNzItNDYuNjdabTExLjI1LDI4LjRsLTUuMTYtMTMuMTctNS4yMywxMy4xN2gxMC4zOVoiLz48cGF0aCBjbGFzcz0iYiIgZD0iTTE4Ni41MywyOS45OWgxMi44NHYzNS4wOGgyMi40NHYxMS4yNWgtMzUuMjhWMjkuOTlaIi8+PHBhdGggY2xhc3M9ImIiIGQ9Ik0yMzAsNDEuMjVoLTEzLjl2LTExLjI1aDQwLjY0djExLjI1aC0xMy45djM1LjA4aC0xMi44NFY0MS4yNVoiLz48cGF0aCBjbGFzcz0iYiIgZD0iTTI2Mi42MywyOS45OWgxMi45MXY0Ni4zM2gtMTIuOTFWMjkuOTlaIi8+PHBhdGggY2xhc3M9ImIiIGQ9Ik0yODQuMDEsMjkuOTloMTEuOThsMTkuMDYsMjQuNDlWMjkuOTloMTIuNzF2NDYuMzNoLTExLjI1bC0xOS43OS0yNS40MnYyNS40MmgtMTIuNzFWMjkuOTlaIi8+PHBhdGggY2xhc3M9ImIiIGQ9Ik0zMzYuMjQsMjkuOTloMTIuOTF2NDYuMzNoLTEyLjkxVjI5Ljk5WiIvPjxwYXRoIGNsYXNzPSJiIiBkPSJNMzY4Ljk0LDQxLjI1aC0xMy45di0xMS4yNWg0MC42NHYxMS4yNWgtMTMuOXYzNS4wOGgtMTIuODRWNDEuMjVaIi8+PHBhdGggY2xhc3M9ImIiIGQ9Ik00MTYuNjgsNTguOThsLTE3LjYxLTI4Ljk5aDE0LjYzbDkuNTMsMTYuODgsOS42LTE2Ljg4aDE0LjM2bC0xNy42MSwyOC43OXYxNy41NGgtMTIuOTF2LTE3LjM0WiIvPjxnPjxwYXRoIGNsYXNzPSJiIiBkPSJNNDU3Ljk5LDM0Ljg5Yy4yOS0uMDksLjU0LS4yNCwuNzMtLjQ0LC4yNS0uMjUsLjM3LS41OCwuMzctMSwwLS40Ny0uMTgtLjg1LS41My0xLjEyLS4zNC0uMjYtLjc5LS40LTEuMzMtLjRoLTIuMDZjLS4wNywwLS4xMiwuMDYtLjEyLC4xMnY0LjYxYzAsLjA3LC4wNiwuMTIsLjEyLC4xMmguNjhjLjA3LDAsLjEyLS4wNiwuMTItLjEydi0xLjYyaC45OWwxLjI5LDEuNjlzLjA2LC4wNSwuMSwuMDVoLjg0cy4wOS0uMDMsLjExLS4wN2MuMDItLjA0LC4wMi0uMDktLjAxLS4xM2wtMS4zMi0xLjcxWm0uMTUtMS40YzAsLjIzLS4wOCwuMzktLjI1LC41MS0uMTgsLjEzLS40MiwuMTktLjcyLC4xOWgtMS4xOXYtMS4zOWgxLjIzYy4zLDAsLjU0LC4wNiwuNywuMTksLjE1LC4xMiwuMjMsLjI4LC4yMywuNVoiLz48cGF0aCBjbGFzcz0iYiIgZD0iTTQ2MS4yLDMyLjY5Yy0uMjQtLjU2LS41Ny0xLjA1LS45OC0xLjQ3LS40MS0uNDItLjktLjc1LTEuNDYtLjk5LS41Ni0uMjQtMS4xNy0uMzYtMS44Mi0uMzZzLTEuMjYsLjEyLTEuODIsLjM3Yy0uNTYsLjI1LTEuMDYsLjU4LTEuNDgsMS0uNDIsLjQyLS43NSwuOTItLjk4LDEuNDctLjI0LC41Ni0uMzYsMS4xNi0uMzYsMS43OXMuMTIsMS4yMywuMzYsMS43OWMuMjQsLjU2LC41NiwxLjA1LC45OCwxLjQ3LC40MSwuNDIsLjksLjc1LDEuNDYsLjk5LC41NiwuMjQsMS4xNywuMzYsMS44MSwuMzZzMS4yNi0uMTIsMS44Mi0uMzdjLjU2LS4yNSwxLjA2LS41OCwxLjQ3LTEsLjQyLS40MiwuNzUtLjkyLC45OC0xLjQ3LC4yNC0uNTYsLjM2LTEuMTYsLjM2LTEuNzlzLS4xMi0xLjIzLS4zNi0xLjc5Wm0tLjMsMS43OWMwLC41NC0uMSwxLjA2LS4zLDEuNTUtLjIsLjQ5LS40OCwuOTEtLjg0LDEuMjctLjM1LC4zNi0uNzgsLjY1LTEuMjcsLjg2LS40OSwuMjEtMS4wMiwuMzItMS41NywuMzJzLTEuMDktLjExLTEuNTYtLjMxYy0uNDgtLjIxLS45LS41LTEuMjUtLjg2LS4zNS0uMzYtLjYzLS43OC0uODMtMS4yNy0uMi0uNDgtLjMtMS0uMy0xLjU0cy4xLTEuMDYsLjMtMS41NWMuMi0uNDgsLjQ4LS45MSwuODQtMS4yNywuMzYtLjM2LC43OC0uNjUsMS4yNi0uODYsLjQ4LS4yMSwxLjAxLS4zMiwxLjU4LS4zMnMxLjA5LC4xMSwxLjU3LC4zMWMuNDgsLjIxLC45LC41LDEuMjUsLjg2LC4zNSwuMzYsLjYzLC43OCwuODMsMS4yNywuMiwuNDgsLjMsMSwuMywxLjU0WiIvPjwvZz48L3N2Zz4=" alt="logo"/></p>
+"""
+
+
+def get_commit_statuses(sha: str) -> pd.DataFrame:
+    """
+    Fetch commit statuses for a given SHA and return as a pandas DataFrame.
+
+    Args:
+        sha (str): Commit SHA to fetch statuses for.
+
+    Returns:
+        pd.DataFrame: DataFrame containing all statuses.
+    """
+    headers = {
+        "Authorization": f"token {os.getenv('GITHUB_TOKEN')}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+
+    url = f"https://api.github.com/repos/Altinity/ClickHouse/commits/{sha}/statuses"
+    response = requests.get(url, headers=headers)
+
+    if response.status_code != 200:
+        raise Exception(
+            f"Failed to fetch statuses: {response.status_code} {response.text}"
+        )
+
+    data = response.json()
+
+    # Parse relevant fields
+    parsed = [
+        {
+            "job_name": item["context"],
+            "job_status": item["state"],
+            "message": item["description"],
+            "results_link": item["target_url"],
+        }
+        for item in data
+    ]
+
+    return (
+        pd.DataFrame(parsed)
+        .sort_values(by=["job_status", "job_name"], ascending=[True, True])
+        .reset_index(drop=True)
+    )
+
+
+def get_pr_info_from_number(pr_number: str) -> dict:
+    """
+    Fetch pull request information for a given PR number.
+
+    Args:
+        pr_number (str): Pull request number to fetch information for.
+
+    Returns:
+        dict: Dictionary containing PR information.
+    """
+    headers = {
+        "Authorization": f"token {os.getenv('GITHUB_TOKEN')}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+
+    url = f"https://api.github.com/repos/Altinity/ClickHouse/pulls/{pr_number}"
+    response = requests.get(url, headers=headers)
+
+    if response.status_code != 200:
+        raise Exception(
+            f"Failed to fetch pull request info: {response.status_code} {response.text}"
+        )
+
+    return response.json()
+
+
+def get_checks_fails(client: Client, job_url: str):
+    """
+    Get tests that did not succeed for the given job URL.
+    Exclude checks that have status 'error' as they are counted in get_checks_errors.
+    """
+    columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url LIKE '{job_url}%'
+                AND test_status IN ('FAIL', 'ERROR')
+                AND check_status!='error'
+                ORDER BY check_name, test_name
+                """
+    return client.query_dataframe(query)
+
+
+def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
+    """
+    Get tests that are known to fail for the given job URL.
+    """
+    assert len(known_fails) > 0, "cannot query the database with empty known fails"
+    columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url LIKE '{job_url}%'
+                AND test_status='BROKEN'
+                AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
+                ORDER BY test_name, check_name
+                """
+
+    df = client.query_dataframe(query)
+
+    df.insert(
+        len(df.columns) - 1,
+        "reason",
+        df["test_name"]
+        .astype(str)
+        .apply(
+            lambda test_name: known_fails[test_name].get("reason", "No reason given")
+        ),
+    )
+
+    return df
+
+
+def get_checks_errors(client: Client, job_url: str):
+    """
+    Get checks that have status 'error' for the given job URL.
+    """
+    columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url LIKE '{job_url}%'
+                AND check_status=='error'
+                ORDER BY check_name, test_name
+                """
+    return client.query_dataframe(query)
+
+
+def drop_prefix_rows(df, column_to_clean):
+    """
+    Drop rows from the dataframe if:
+    - the row matches another row completely except for the specified column
+    - the specified column of that row is a prefix of the same column in another row
+    """
+    to_drop = set()
+    reference_columns = [col for col in df.columns if col != column_to_clean]
+    for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2):
+        if all(row_1[col] == row_2[col] for col in reference_columns):
+            if row_2[column_to_clean].startswith(row_1[column_to_clean]):
+                to_drop.add(i)
+            elif row_1[column_to_clean].startswith(row_2[column_to_clean]):
+                to_drop.add(j)
+    return df.drop(to_drop)
+
+
+def get_regression_fails(client: Client, job_url: str):
+    """
+    Get regression tests that did not succeed for the given job URL.
+    """
+    # If you rename the alias for report_url, also update the formatters in format_results_as_html_table
+    # Nested SELECT handles test reruns
+    query = f"""SELECT arch, job_name, status, test_name, results_link
+            FROM (
+               SELECT
+                    architecture as arch,
+                    test_name,
+                    argMax(result, start_time) AS status,
+                    job_url,
+                    job_name,
+                    report_url as results_link
+               FROM `gh-data`.clickhouse_regression_results
+               GROUP BY architecture, test_name, job_url, job_name, report_url
+               ORDER BY length(test_name) DESC
+            )
+            WHERE job_url='{job_url}'
+            AND status IN ('Fail', 'Error')
+            """
+    df = client.query_dataframe(query)
+    df = drop_prefix_rows(df, "test_name")
+    df["job_name"] = df["job_name"].str.title()
+    return df
+
+
+def get_cves(pr_number, commit_sha):
+    s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL"))
+    s3_prefix = f"{pr_number}/{commit_sha}/grype/"
+
+    results = []
+
+    response = s3_client.list_objects_v2(
+        Bucket=S3_BUCKET, Prefix=s3_prefix, Delimiter="/"
+    )
+    grype_result_dirs = [
+        content["Prefix"] for content in response.get("CommonPrefixes", [])
+    ]
+
+    for path in grype_result_dirs:
+        file_key = f"{path}result.json"
+        file_response = s3_client.get_object(Bucket=S3_BUCKET, Key=file_key)
+        content = file_response["Body"].read().decode("utf-8")
+        results.append(json.loads(content))
+
+    rows = []
+    for scan_result in results:
+        for match in scan_result["matches"]:
+            rows.append(
+                {
+                    "docker_image": scan_result["source"]["target"]["userInput"],
+                    "severity": match["vulnerability"]["severity"],
+                    "identifier": match["vulnerability"]["id"],
+                    "namespace": match["vulnerability"]["namespace"],
+                }
+            )
+
+    if len(rows) == 0:
+        return pd.DataFrame()
+
+    df = pd.DataFrame(rows).drop_duplicates()
+    df = df.sort_values(
+        by="severity",
+        key=lambda col: col.str.lower().map(
+            {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5}
+        ),
+    )
+    return df
+
+
+def url_to_html_link(url: str) -> str:
+    if not url:
+        return ""
+    text = url.split("/")[-1]
+    if not text:
+        text = "results"
+    return f'<a href="{url}">{text}</a>'
+
+
+def format_test_name_for_linewrap(text: str) -> str:
+    """Tweak the test name to improve line wrapping."""
+    return text.replace(".py::", "/")
+
+
+def format_test_status(text: str) -> str:
+    """Format the test status for better readability."""
+    color = (
+        "red"
+        if text.lower().startswith("fail")
+        else "orange" if text.lower() in ("error", "broken") else "green"
+    )
+    return f'<span style="font-weight: bold; color: {color}">{text}</span>'
+
+
+def format_results_as_html_table(results) -> str:
+    if len(results) == 0:
+        return "<p>Nothing to report</p>"
+    results.columns = [col.replace("_", " ").title() for col in results.columns]
+    html = results.to_html(
+        index=False,
+        formatters={
+            "Results Link": url_to_html_link,
+            "Test Name": format_test_name_for_linewrap,
+            "Test Status": format_test_status,
+            "Job Status": format_test_status,
+            "Status": format_test_status,
+            "Message": lambda m: m.replace("\n", " "),
+            "Identifier": lambda i: url_to_html_link(
+                "https://nvd.nist.gov/vuln/detail/" + i
+            ),
+        },
+        escape=False,
+    ).replace(' border="1"', "")
+    return html
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Create a combined CI report.")
+    parser.add_argument(
+        "--actions-run-url", required=True, help="URL of the actions run"
+    )
+    parser.add_argument(
+        "--pr-number", required=True, help="Pull request number for the S3 path"
+    )
+    parser.add_argument(
+        "--commit-sha", required=True, help="Commit SHA for the S3 path"
+    )
+    parser.add_argument(
+        "--no-upload", action="store_true", help="Do not upload the report"
+    )
+    parser.add_argument(
+        "--known-fails", type=str, help="Path to the file with known fails"
+    )
+    parser.add_argument(
+        "--cves", action="store_true", help="Get CVEs from Grype results"
+    )
+    parser.add_argument(
+        "--mark-preview", action="store_true", help="Mark the report as a preview"
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    db_client = Client(
+        host=os.getenv(DATABASE_HOST_VAR),
+        user=os.getenv(DATABASE_USER_VAR),
+        password=os.getenv(DATABASE_PASSWORD_VAR),
+        port=9440,
+        secure="y",
+        verify=False,
+        settings={"use_numpy": True},
+    )
+
+    fail_results = {
+        "job_statuses": get_commit_statuses(args.commit_sha),
+        "checks_fails": get_checks_fails(db_client, args.actions_run_url),
+        "checks_known_fails": [],
+        "checks_errors": get_checks_errors(db_client, args.actions_run_url),
+        "regression_fails": get_regression_fails(db_client, args.actions_run_url),
+        "docker_images_cves": (
+            [] if not args.cves else get_cves(args.pr_number, args.commit_sha)
+        ),
+    }
+
+    if args.known_fails:
+        if not os.path.exists(args.known_fails):
+            print(f"Known fails file {args.known_fails} not found.")
+            exit(1)
+
+        with open(args.known_fails) as f:
+            known_fails = json.load(f)
+
+        if known_fails:
+            fail_results["checks_known_fails"] = get_checks_known_fails(
+                db_client, args.actions_run_url, known_fails
+            )
+
+    if args.pr_number == "0":
+        pr_info_html = "Release"
+    else:
+        try:
+            pr_info = get_pr_info_from_number(args.pr_number)
+            pr_info_html = f"""<a href="https://github.com/Altinity/ClickHouse/pull/{pr_info["number"]}">
+                    #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')})  {pr_info.get("title")}
+                    </a>"""
+        except Exception as e:
+            pr_info_html = e
+
+    high_cve_count = 0
+    if len(fail_results["docker_images_cves"]) > 0:
+        high_cve_count = (
+            fail_results["docker_images_cves"]["severity"]
+            .str.lower()
+            .isin(("high", "critical"))
+            .sum()
+        )
+
+    title = "ClickHouse® CI Workflow Run Report"
+
+    html_report = f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <style>{css}
+    </style>
+    <title>{title}</title>
+</head>
+<body>
+    {logo}
+    <h1>{title}</h1>
+    <table>
+        <tr>
+            <th class='hth no-sort'>Pull Request</th><td>{pr_info_html}</td>
+        </tr>
+        <tr>
+            <th class='hth no-sort'>Workflow Run</th><td><a href="{args.actions_run_url}">{args.actions_run_url.split('/')[-1]}</a></td>
+        </tr>
+        <tr>
+            <th class='hth no-sort'>Commit</th><td><a href="https://github.com/Altinity/ClickHouse/commit/{args.commit_sha}">{args.commit_sha}</a></td>
+        </tr>
+        <tr>
+            <th class='hth no-sort'>Date</th><td>{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC</td>
+        </tr>
+    </table>
+
+    <h2>Table of Contents</h2>
+{'<p style="font-weight: bold;color: #F00;">This is a preview. FinishCheck has not completed.</p>' if args.mark_preview else ""}
+<ul>
+    <li><a href="#ci-jobs-status">CI Jobs Status</a> ({sum(fail_results['job_statuses']['job_status'] != 'success')} fail/error)</li>
+    <li><a href="#checks-errors">Checks Errors</a> ({len(fail_results['checks_errors'])})</li>
+    <li><a href="#checks-fails">Checks New Fails</a> ({len(fail_results['checks_fails'])})</li>
+    <li><a href="#regression-fails">Regression New Fails</a> ({len(fail_results['regression_fails'])})</li>
+    <li><a href="#docker-images-cves">Docker Images CVEs</a> ({'N/A' if not args.cves else f'{high_cve_count} high/critical'})</li>
+    <li><a href="#checks-known-fails">Checks Known Fails</a> ({'N/A' if not args.known_fails else len(fail_results['checks_known_fails'])})</li>
+</ul>
+
+<h2 id="ci-jobs-status">CI Jobs Status</h2> 
+{format_results_as_html_table(fail_results['job_statuses'])}
+
+<h2 id="checks-errors">Checks Errors</h2>
+{format_results_as_html_table(fail_results['checks_errors'])}
+
+<h2 id="checks-fails">Checks New Fails</h2>
+{format_results_as_html_table(fail_results['checks_fails'])}
+
+<h2 id="regression-fails">Regression New Fails</h2>
+{format_results_as_html_table(fail_results['regression_fails'])}
+
+<h2 id="docker-images-cves">Docker Images CVEs</h2>
+{"<p>Not Checked</p>" if not args.cves else format_results_as_html_table(fail_results['docker_images_cves'])}
+
+<h2 id="checks-known-fails">Checks Known Fails</h2>
+{"<p>Not Checked</p>" if not args.known_fails else format_results_as_html_table(fail_results['checks_known_fails'])}
+
+{script}
+</body>
+</html>
+"""
+    report_name = "ci_run_report.html"
+    report_path = Path(report_name)
+    report_path.write_text(html_report, encoding="utf-8")
+
+    if args.no_upload:
+        print(f"Report saved to {report_path}")
+        exit(0)
+
+    report_destination_key = f"{args.pr_number}/{args.commit_sha}/{report_name}"
+
+    # Upload the report to S3
+    s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL"))
+
+    try:
+        s3_client.put_object(
+            Bucket=S3_BUCKET,
+            Key=report_destination_key,
+            Body=html_report,
+            ContentType="text/html; charset=utf-8",
+        )
+    except NoCredentialsError:
+        print("Credentials not available for S3 upload.")
+
+    print(f"https://s3.amazonaws.com/{S3_BUCKET}/" + report_destination_key)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index e962e1391ce7..2a2eeecc0fad 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -576,9 +576,9 @@ jobs:
           ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
         shell: bash
         run: |
-          pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0
+          pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3
 
-          REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json)
+          REPORT_LINK=$(python3 .github/create_workflow_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves)
 
           IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
           if [[ -n $IS_VALID_URL ]]; then

From c12ca0ed5f58c2e4aec99ffb4f1fc0e1e5fda8bd Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Wed, 16 Apr 2025 10:25:36 -0400
Subject: [PATCH 18/24] fix 'cannot specify version when calling local
 workflows'

---
 .github/grype/parse_vulnerabilities_grype.py  |  32 +++++
 .github/grype/run_grype_scan.sh               |  18 +++
 .../grype/transform_and_upload_results_s3.sh  |  13 ++
 .github/workflows/grype_scan.yml              | 131 ++++++++++++++++++
 .github/workflows/release_branches.yml        |   2 +-
 5 files changed, 195 insertions(+), 1 deletion(-)
 create mode 100644 .github/grype/parse_vulnerabilities_grype.py
 create mode 100755 .github/grype/run_grype_scan.sh
 create mode 100755 .github/grype/transform_and_upload_results_s3.sh
 create mode 100644 .github/workflows/grype_scan.yml

diff --git a/.github/grype/parse_vulnerabilities_grype.py b/.github/grype/parse_vulnerabilities_grype.py
new file mode 100644
index 000000000000..fec2ef3bfac7
--- /dev/null
+++ b/.github/grype/parse_vulnerabilities_grype.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+import json
+
+from testflows.core import *
+
+xfails = {}
+
+
+@Name("docker vulnerabilities")
+@XFails(xfails)
+@TestModule
+def docker_vulnerabilities(self):
+    with Given("I gather grype scan results"):
+        with open("./result.json", "r") as f:
+            results = json.load(f)
+
+    for vulnerability in results["matches"]:
+        with Test(
+            f"{vulnerability['vulnerability']['id']}@{vulnerability['vulnerability']['namespace']},{vulnerability['vulnerability']['severity']}",
+            flags=TE,
+        ):
+            note(vulnerability)
+            critical_levels = set(["HIGH", "CRITICAL"])
+            if vulnerability['vulnerability']["severity"].upper() in critical_levels:
+                with Then(
+                    f"Found vulnerability of {vulnerability['vulnerability']['severity']} severity"
+                ):
+                    result(Fail)
+
+
+if main():
+    docker_vulnerabilities()
diff --git a/.github/grype/run_grype_scan.sh b/.github/grype/run_grype_scan.sh
new file mode 100755
index 000000000000..c5ce0b1b10d3
--- /dev/null
+++ b/.github/grype/run_grype_scan.sh
@@ -0,0 +1,18 @@
+set -x
+set -e
+
+IMAGE=$1
+
+GRYPE_VERSION="v0.80.1"
+
+docker pull $IMAGE
+docker pull anchore/grype:${GRYPE_VERSION}
+
+docker run \
+ --rm --volume /var/run/docker.sock:/var/run/docker.sock \
+ --name Grype anchore/grype:${GRYPE_VERSION} \
+ --scope all-layers \
+ -o json \
+ $IMAGE > result.json
+
+ls -sh
diff --git a/.github/grype/transform_and_upload_results_s3.sh b/.github/grype/transform_and_upload_results_s3.sh
new file mode 100755
index 000000000000..7a10b02887ef
--- /dev/null
+++ b/.github/grype/transform_and_upload_results_s3.sh
@@ -0,0 +1,13 @@
+DOCKER_IMAGE=$(echo "$DOCKER_IMAGE" | sed 's/[\/:]/_/g')
+
+S3_PATH="s3://$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE"
+HTTPS_S3_PATH="https://s3.amazonaws.com/$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE"
+echo "https_s3_path=$HTTPS_S3_PATH" >> $GITHUB_OUTPUT
+
+tfs --no-colors transform nice raw.log nice.log.txt
+tfs --no-colors report results -a $HTTPS_S3_PATH raw.log - --copyright "Altinity LTD" | tfs --no-colors document convert > results.html
+
+aws s3 cp --no-progress nice.log.txt $S3_PATH/nice.log.txt --content-type "text/plain; charset=utf-8" || echo "nice log file not found".
+aws s3 cp --no-progress results.html $S3_PATH/results.html || echo "results file not found".
+aws s3 cp --no-progress raw.log $S3_PATH/raw.log || echo "raw.log file not found".
+aws s3 cp --no-progress result.json $S3_PATH/result.json --content-type "text/plain; charset=utf-8" || echo "result.json not found".
\ No newline at end of file
diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml
new file mode 100644
index 000000000000..1414129fd666
--- /dev/null
+++ b/.github/workflows/grype_scan.yml
@@ -0,0 +1,131 @@
+name: Grype Scan
+run-name: Grype Scan ${{ inputs.docker_image }}
+
+on:
+    workflow_dispatch:
+        # Inputs for manual run
+        inputs:
+            docker_image:
+                description: 'Docker image. If no tag, it will be determined by version_helper.py'
+                required: true
+    workflow_call:
+        # Inputs for workflow call
+        inputs:
+            docker_image:
+                description: 'Docker image. If no tag, it will be determined by version_helper.py'
+                required: true
+                type: string
+env:
+  PYTHONUNBUFFERED: 1
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+
+jobs:
+    grype_scan:
+        name: Grype Scan
+        runs-on: [self-hosted, altinity-on-demand, altinity-func-tester-aarch64]
+        steps:
+        - name: Checkout repository
+          uses: actions/checkout@v4
+
+        - name: Set up Docker
+          uses: docker/setup-buildx-action@v3
+
+        - name: Set up Python
+          run: |
+            export TESTFLOWS_VERSION="2.4.19"
+            sudo apt-get update
+            sudo apt-get install -y python3-pip python3-venv
+            python3 -m venv venv
+            source venv/bin/activate
+            pip install --upgrade requests chardet urllib3
+            pip install testflows==$TESTFLOWS_VERSION awscli==1.33.28
+            echo PATH=$PATH >>$GITHUB_ENV
+
+        - name: Set image tag if not given
+          if: ${{ !contains(inputs.docker_image, ':') }}
+          id: set_version
+          run: |
+            python3 ./tests/ci/version_helper.py | tee /tmp/version_info
+            source /tmp/version_info
+            echo "docker_image=${{ inputs.docker_image }}:${{ github.event.pull_request.number || 0 }}-$CLICKHOUSE_VERSION_STRING" >> $GITHUB_OUTPUT
+            echo "commit_sha=$CLICKHOUSE_VERSION_GITHASH" >> $GITHUB_OUTPUT
+
+        - name: Run Grype Scan
+          run: |
+            DOCKER_IMAGE=${{ steps.set_version.outputs.docker_image || inputs.docker_image }}
+            ./.github/grype/run_grype_scan.sh $DOCKER_IMAGE
+
+        - name: Parse grype results
+          run: |
+            python3 -u ./.github/grype/parse_vulnerabilities_grype.py -o nice --no-colors --log raw.log --test-to-end
+
+        - name: Transform and Upload Grype Results
+          if: always()
+          id: upload_results
+          env:
+            S3_BUCKET: "altinity-build-artifacts"
+            COMMIT_SHA: ${{ steps.set_version.outputs.commit_sha || github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+            PR_NUMBER: ${{ github.event.pull_request.number || 0 }}
+            DOCKER_IMAGE: ${{ steps.set_version.outputs.docker_image || inputs.docker_image }}
+          run: |
+            ./.github/grype/transform_and_upload_results_s3.sh
+
+        - name: Create step summary
+          if: always()
+          id: create_summary
+          run: |
+            jq -r '.distro | "**Distro**: \(.name):\(.version)"' result.json >> $GITHUB_STEP_SUMMARY
+            if jq -e '.matches | length == 0' result.json > /dev/null; then
+              echo "No CVEs" >> $GITHUB_STEP_SUMMARY
+            else
+              echo "| Severity   | Count |" >> $GITHUB_STEP_SUMMARY
+              echo "|------------|-------|" >> $GITHUB_STEP_SUMMARY
+              jq -r '
+                .matches | 
+                map(.vulnerability.severity) | 
+                group_by(.) | 
+                map({severity: .[0], count: length}) | 
+                sort_by(.severity) | 
+                map("| \(.severity) | \(.count) |") | 
+                .[]
+              ' result.json >> $GITHUB_STEP_SUMMARY
+            fi
+
+            HIGH_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "High")) | length' result.json)
+            CRITICAL_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "Critical")) | length' result.json)
+            TOTAL_HIGH_CRITICAL=$((HIGH_COUNT + CRITICAL_COUNT))
+            echo "total_high_critical=$TOTAL_HIGH_CRITICAL" >> $GITHUB_OUTPUT
+
+            if [ $TOTAL_HIGH_CRITICAL -gt 0 ]; then
+                echo '## High and Critical vulnerabilities found' >> $GITHUB_STEP_SUMMARY
+                echo '```' >> $GITHUB_STEP_SUMMARY
+                cat raw.log | tfs --no-colors show tests | grep -Pi 'High|Critical' >> $GITHUB_STEP_SUMMARY
+                echo '```' >> $GITHUB_STEP_SUMMARY
+            fi
+
+        - name: Set commit status
+          if: always()
+          uses: actions/github-script@v7
+          with:
+            github-token: ${{ secrets.GITHUB_TOKEN }}
+            script: |
+              github.rest.repos.createCommitStatus({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                sha: '${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}',
+                state: '${{ steps.create_summary.outputs.total_high_critical > 0 && 'failure' || 'success' }}',
+                target_url: '${{ steps.upload_results.outputs.https_s3_path }}/results.html',
+                description: 'Grype Scan Completed with ${{ steps.create_summary.outputs.total_high_critical }} high/critical vulnerabilities',
+                context: 'Grype Scan ${{ steps.set_version.outputs.docker_image || inputs.docker_image }}'
+              })
+
+        - name: Upload artifacts
+          if: always()
+          uses: actions/upload-artifact@v4
+          with:
+            name: grype-results-${{ hashFiles('raw.log') }}
+            path: |
+              result.json
+              nice.log.txt
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 2a2eeecc0fad..870945efee78 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -181,7 +181,7 @@ jobs:
             suffix: '-alpine'
           - image: keeper
             suffix: ''
-    uses: ./.github/workflows/grype_scan.yml@antalya
+    uses: ./.github/workflows/grype_scan.yml
     secrets: inherit
     with:
       docker_image: altinityinfra/clickhouse-${{ matrix.image }}:${{ github.event.pull_request.number || 0 }}-${{ fromJson(needs.RunConfig.outputs.data).version }}${{ matrix.suffix }}

From 9f27dca882f2081aabc5c6d20d0e0ed38cb1d5a5 Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 17 Apr 2025 14:23:37 -0400
Subject: [PATCH 19/24] add missing GITHUB_TOKEN

---
 .github/workflows/release_branches.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 479823f54628..166421d818d5 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -573,6 +573,7 @@ jobs:
           CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }}
           CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }}
           CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
           PR_NUMBER: ${{ github.event.pull_request.number || 0  }}
           ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}

From 377556249326ba1b3f0b72602ffbe0118b373e78 Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Fri, 18 Apr 2025 09:55:48 -0400
Subject: [PATCH 20/24] Add matrix values to job name attr in regression

---
 .github/workflows/regression.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index 1142a1310867..96692fb49174 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -179,7 +179,7 @@ jobs:
               python3
               -u ${{ env.SUITE }}/regression.py
               --clickhouse-binary-path ${{ env.clickhouse_path }}
-              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.SUITE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
               ${{ env.args }} || EXITCODE=$?;
               .github/add_link_to_logs.sh;
               exit $EXITCODE
@@ -243,7 +243,7 @@ jobs:
               -u alter/regression.py
               --clickhouse-binary-path ${{ env.clickhouse_path }}
               --only "/alter/${{ matrix.ONLY }} partition/*"
-              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.ONLY }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
               ${{ env.args }} || EXITCODE=$?;
               .github/add_link_to_logs.sh;
               exit $EXITCODE
@@ -314,7 +314,7 @@ jobs:
               --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }}
               --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }}
               --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }}
-              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
               ${{ env.args }} || EXITCODE=$?;
               .github/add_link_to_logs.sh;
               exit $EXITCODE
@@ -436,7 +436,7 @@ jobs:
               python3
               -u ${{ env.SUITE }}/regression.py
               --clickhouse-binary-path ${{ env.clickhouse_path }}
-              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.SUITE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
               ${{ env.args }} || EXITCODE=$?;
               .github/add_link_to_logs.sh;
               exit $EXITCODE
@@ -562,7 +562,7 @@ jobs:
               --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }}
               --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }}
               --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }}
-              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
               ${{ env.args }} || EXITCODE=$?;
               .github/add_link_to_logs.sh;
               exit $EXITCODE
@@ -633,7 +633,7 @@ jobs:
               --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }}
               --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }}
               --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }}
-              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
               ${{ env.args }} || EXITCODE=$?;
               .github/add_link_to_logs.sh;
               exit $EXITCODE
@@ -703,7 +703,7 @@ jobs:
               --gcs-key-secret ${{ secrets.REGRESSION_GCS_KEY_SECRET }}
               --gcs-uri ${{ secrets.REGRESSION_GCS_URI }}
               --with-${{ matrix.STORAGE }}
-              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+              --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
               ${{ env.args }} || EXITCODE=$?;
               .github/add_link_to_logs.sh;
               exit $EXITCODE

From cd66e5e046a779d7976c3f204d228b549483c0d6 Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Fri, 18 Apr 2025 11:23:58 -0400
Subject: [PATCH 21/24] Don't halt on suspicious, but not sensitive, strings.
 Too many false positives

---
 tests/ci/s3_helper.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py
index a473a108acd6..71545eca643b 100644
--- a/tests/ci/s3_helper.py
+++ b/tests/ci/s3_helper.py
@@ -20,14 +20,16 @@
     S3_URL,
 )
 
-sensitive_var_pattern = re.compile(
-    r"\b[A-Z_]*(?<!WRONG_)(_SECRET|SECRET_|PASSWORD|ACCESS_KEY|TOKEN)[A-Z_]*\b(?!%)(?!=clickhouse)(?!=minio)(?!=mysecretpassword)(?!: \*{3}$)(?! '\[HIDDEN\]')"
+# sensitive_var_pattern = re.compile(
+#     r"\b[A-Z_]*(?<!WRONG_)(_SECRET|SECRET_|PASSWORD|ACCESS_KEY|TOKEN)[A-Z_]*\b(?!%)(?!=clickhouse)(?!=minio)(?!=mysecretpassword)(?!: \*{3}$)(?! '\[HIDDEN\]')"
+# )
+sensitive_env_pattern = re.compile(
+    r"\b[A-Z_]*(SECRET|PASSWORD|ACCESS_KEY|TOKEN)[A-Z_]*\b"
 )
 sensitive_strings = {
-    var: value for var, value in os.environ.items() if sensitive_var_pattern.match(var)
+    var: value for var, value in os.environ.items() if sensitive_env_pattern.match(var)
 }
 
-
 def scan_file_for_sensitive_data(file_content, file_name):
     """
     Scan the content of a file for sensitive strings.
@@ -41,8 +43,8 @@ def clean_line(line):
 
     matches = []
     for line_number, line in enumerate(file_content.splitlines(), start=1):
-        for match in sensitive_var_pattern.finditer(line):
-            matches.append((file_name, line_number, clean_line(line)))
+        # for match in sensitive_var_pattern.finditer(line):
+        #     matches.append((file_name, line_number, clean_line(line)))
         for name, value in sensitive_strings.items():
             if value in line:
                 matches.append((file_name, line_number, clean_line(line)))

From 473961a7f512aac0894cf6552146dc48fbba1be6 Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Sat, 19 Apr 2025 17:18:41 -0400
Subject: [PATCH 22/24] support pagination to fix missing CI jobs status

---
 .github/create_workflow_report.py | 37 ++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py
index 4257925b826d..7db00ddd8f44 100755
--- a/.github/create_workflow_report.py
+++ b/.github/create_workflow_report.py
@@ -198,6 +198,7 @@
 def get_commit_statuses(sha: str) -> pd.DataFrame:
     """
     Fetch commit statuses for a given SHA and return as a pandas DataFrame.
+    Handles pagination to get all statuses.
 
     Args:
         sha (str): Commit SHA to fetch statuses for.
@@ -210,15 +211,35 @@ def get_commit_statuses(sha: str) -> pd.DataFrame:
         "Accept": "application/vnd.github.v3+json",
     }
 
-    url = f"https://api.github.com/repos/Altinity/ClickHouse/commits/{sha}/statuses"
-    response = requests.get(url, headers=headers)
+    url = f"https://api.github.com/repos/{GITHUB_REPO}/commits/{sha}/statuses"
 
-    if response.status_code != 200:
-        raise Exception(
-            f"Failed to fetch statuses: {response.status_code} {response.text}"
-        )
+    all_data = []
+
+    while url:
+        response = requests.get(url, headers=headers)
+
+        if response.status_code != 200:
+            raise Exception(
+                f"Failed to fetch statuses: {response.status_code} {response.text}"
+            )
+
+        data = response.json()
+        all_data.extend(data)
+
+        # Check for pagination links in the response headers
+        if "Link" in response.headers:
+            links = response.headers["Link"].split(",")
+            next_url = None
+
+            for link in links:
+                parts = link.strip().split(";")
+                if len(parts) == 2 and 'rel="next"' in parts[1]:
+                    next_url = parts[0].strip("<>")
+                    break
 
-    data = response.json()
+            url = next_url
+        else:
+            url = None
 
     # Parse relevant fields
     parsed = [
@@ -228,7 +249,7 @@ def get_commit_statuses(sha: str) -> pd.DataFrame:
             "message": item["description"],
             "results_link": item["target_url"],
         }
-        for item in data
+        for item in all_data
     ]
 
     return (

From 70d617a0ddd91f43790f508f1c44b6af2ab21542 Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Tue, 22 Apr 2025 08:41:03 -0400
Subject: [PATCH 23/24] try to fix branch filter

---
 .github/workflows/release_branches.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 166421d818d5..6fb776b11c8f 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -25,7 +25,8 @@ on: # yamllint disable-line rule:truthy
       - prereleased
   push:
     branches:
-      - '**/24.3*'
+      - 'releases/24.3*'
+      - 'customizations/24.3*'
     tags:
       - '*'
   workflow_dispatch:

From e391aa90e284daf881ed523f8c67f2dfc73463c1 Mon Sep 17 00:00:00 2001
From: strtgbb <146047128+strtgbb@users.noreply.github.com>
Date: Thu, 24 Apr 2025 11:16:46 -0400
Subject: [PATCH 24/24] fix usage of GITHUB_REPO in report generation

---
 .github/create_workflow_report.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py
index 7db00ddd8f44..618ee98988fd 100755
--- a/.github/create_workflow_report.py
+++ b/.github/create_workflow_report.py
@@ -17,6 +17,7 @@
 DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
 DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
 S3_BUCKET = "altinity-build-artifacts"
+GITHUB_REPO = "Altinity/ClickHouse"
 
 
 css = """
@@ -274,7 +275,7 @@ def get_pr_info_from_number(pr_number: str) -> dict:
         "Accept": "application/vnd.github.v3+json",
     }
 
-    url = f"https://api.github.com/repos/Altinity/ClickHouse/pulls/{pr_number}"
+    url = f"https://api.github.com/repos/{GITHUB_REPO}/pulls/{pr_number}"
     response = requests.get(url, headers=headers)
 
     if response.status_code != 200:
@@ -544,7 +545,7 @@ def main():
     else:
         try:
             pr_info = get_pr_info_from_number(args.pr_number)
-            pr_info_html = f"""<a href="https://github.com/Altinity/ClickHouse/pull/{pr_info["number"]}">
+            pr_info_html = f"""<a href="https://github.com/{GITHUB_REPO}/pull/{pr_info["number"]}">
                     #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')})  {pr_info.get("title")}
                     </a>"""
         except Exception as e:
@@ -582,7 +583,7 @@ def main():
             <th class='hth no-sort'>Workflow Run</th><td><a href="{args.actions_run_url}">{args.actions_run_url.split('/')[-1]}</a></td>
         </tr>
         <tr>
-            <th class='hth no-sort'>Commit</th><td><a href="https://github.com/Altinity/ClickHouse/commit/{args.commit_sha}">{args.commit_sha}</a></td>
+            <th class='hth no-sort'>Commit</th><td><a href="https://github.com/{GITHUB_REPO}/commit/{args.commit_sha}">{args.commit_sha}</a></td>
         </tr>
         <tr>
             <th class='hth no-sort'>Date</th><td>{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC</td>