From 91ce6889d2715e37c95d6323b984fe38642e6c54 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Wed, 30 Oct 2024 13:11:59 -0400 Subject: [PATCH 01/24] a universal approach to crossing out failed tests --- tests/broken_tests.json | 16 ++++++++++++++++ tests/ci/report.py | 27 ++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tests/broken_tests.json diff --git a/tests/broken_tests.json b/tests/broken_tests.json new file mode 100644 index 000000000000..28ffc977d28b --- /dev/null +++ b/tests/broken_tests.json @@ -0,0 +1,16 @@ +{ + "test_replicated_merge_tree_replicated_db_ttl/test.py::test_replicated_db_and_ttl": { + "message": "DB::Exception: Replicated is an experimental database engine.", + "reason": "Will not work without allow_experimental_database_replicated=1" + }, + "test_storage_s3_queue/test.py::test_upgrade": { + "message": "DB::Exception: S3Queue is experimental.", + "reason": "Will not work without allow_experimental_s3queue=1" + }, + "02920_alter_column_of_projections": { + "reason": "requires different settings" + }, + "02888_system_tables_with_inaccsessible_table_function": { + "reason": "todo investigate" + } +} \ No newline at end of file diff --git a/tests/ci/report.py b/tests/ci/report.py index a3c9b53637a9..4bc92014c27f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -23,7 +23,7 @@ from build_download_helper import get_gh_api from ci_config import CI_CONFIG, BuildConfig from ci_utils import normalize_string -from env_helper import REPORT_PATH, TEMP_PATH +from env_helper import REPORT_PATH, TEMP_PATH, ROOT_DIR logger = logging.getLogger(__name__) @@ -325,6 +325,19 @@ def path_converter(obj): def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults: results = [] # type: TestResults + + broken_tests_config_path = f"{ROOT_DIR}/tests/broken_tests.json" + if ( + os.path.isfile(broken_tests_config_path) + and os.path.getsize(broken_tests_config_path) > 0 + ): + with open(broken_tests_config_path, "r", encoding="utf-8") as broken_tests_file: + broken_tests = json.load( + broken_tests_file + ) # type: Dict[str, Dict[str, str]] + else: + broken_tests = {} + with open(results_path, "r", encoding="utf-8") as descriptor: reader = csv.reader(descriptor, delimiter="\t") for line in reader: @@ -351,6 +364,18 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes else: result.set_log_files(line[3]) + if name in broken_tests.keys() and status == "FAIL": + fail_message = broken_tests[name].get("message", "") + if result.log_files and fail_message: + for log_path in result.log_files: + if log_path.endswith(".log"): + with open(log_path) as log_file: + if fail_message in log_file.read(): + result.status = "XFAIL" + break + else: + result.status = "XFAIL" + results.append(result) return results From 8405250d573a42d9a1d3723abb3ac0429c92b1db Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:49:16 -0400 Subject: [PATCH 02/24] handle known broken tests in process_functional_tests_result.py --- docker/test/util/process_functional_tests_result.py | 10 ++++++++++ tests/ci/functional_test_check.py | 1 + 2 files changed, 11 insertions(+) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index fd4cc9f4bf76..30cba0d6690d 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -4,6 +4,7 @@ import logging import argparse import csv +import json OK_SIGN = "[ OK " FAIL_SIGN = "[ FAIL " @@ -206,6 +207,7 @@ def write_results(results_file, status_file, results, status): parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") parser.add_argument("--broken-tests", default="/analyzer_tech_debt.txt") + parser.add_argument("--broken-tests-json", default="/broken_tests.json") args = parser.parse_args() broken_tests = list() @@ -213,6 +215,14 @@ def write_results(results_file, status_file, results, status): logging.info(f"File {args.broken_tests} with broken tests found") with open(args.broken_tests) as f: broken_tests = f.read().splitlines() + + if os.path.exists(args.broken_tests_json): + logging.info(f"File {args.broken_tests_json} with broken tests found") + + with open(args.broken_tests_json) as f: + broken_tests.extend(json.load(f).keys()) + + if broken_tests: logging.info(f"Broken tests in the list: {len(broken_tests)}") state, description, test_results = process_result(args.in_results_dir, broken_tests) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 8f1ffb05ac3c..14c07665642e 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -96,6 +96,7 @@ def get_run_command( f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt " if "analyzer" not in check_name else "" + f"--volume={repo_path}/tests/broken_tests.json:/broken_tests.json " ) return ( From 08463a4bc2fd513e5ddd1c9cd6d670883755feb5 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:53:04 -0400 Subject: [PATCH 03/24] remove broken test handling from report.py --- tests/ci/report.py | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 4bc92014c27f..a3c9b53637a9 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -23,7 +23,7 @@ from build_download_helper import get_gh_api from ci_config import CI_CONFIG, BuildConfig from ci_utils import normalize_string -from env_helper import REPORT_PATH, TEMP_PATH, ROOT_DIR +from env_helper import REPORT_PATH, TEMP_PATH logger = logging.getLogger(__name__) @@ -325,19 +325,6 @@ def path_converter(obj): def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults: results = [] # type: TestResults - - broken_tests_config_path = f"{ROOT_DIR}/tests/broken_tests.json" - if ( - os.path.isfile(broken_tests_config_path) - and os.path.getsize(broken_tests_config_path) > 0 - ): - with open(broken_tests_config_path, "r", encoding="utf-8") as broken_tests_file: - broken_tests = json.load( - broken_tests_file - ) # type: Dict[str, Dict[str, str]] - else: - broken_tests = {} - with open(results_path, "r", encoding="utf-8") as descriptor: reader = csv.reader(descriptor, delimiter="\t") for line in reader: @@ -364,18 +351,6 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes else: result.set_log_files(line[3]) - if name in broken_tests.keys() and status == "FAIL": - fail_message = broken_tests[name].get("message", "") - if result.log_files and fail_message: - for log_path in result.log_files: - if log_path.endswith(".log"): - with open(log_path) as log_file: - if fail_message in log_file.read(): - result.status = "XFAIL" - break - else: - result.status = "XFAIL" - results.append(result) return results From e0738a541a683b1dff132e0dce5c3ef76e270180 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:09:33 -0400 Subject: [PATCH 04/24] add broken test handling back to integration_tests_runner.py --- tests/ci/integration_tests_runner.py | 35 ++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index f10c0a190e2f..18725a509095 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -474,6 +474,19 @@ def _get_parallel_tests_skip_list(repo_path): skip_list_tests = json.load(skip_list_file) return list(sorted(skip_list_tests)) + @staticmethod + def _get_broken_tests_list(repo_path: str) -> dict: + skip_list_file_path = f"{repo_path}/tests/broken_tests.json" + if ( + not os.path.isfile(skip_list_file_path) + or os.path.getsize(skip_list_file_path) == 0 + ): + return {} + + with open(skip_list_file_path, "r", encoding="utf-8") as skip_list_file: + skip_list_tests = json.load(skip_list_file) + return skip_list_tests + @staticmethod def group_test_by_file(tests): result = {} # type: Dict @@ -891,6 +904,8 @@ def run_impl(self, repo_path, build_path): " ".join(not_found_tests[:3]), ) + known_broken_tests = self._get_broken_tests_list(repo_path) + grouped_tests = self.group_test_by_file(filtered_sequential_tests) i = 0 for par_group in chunks(filtered_parallel_tests, PARALLEL_GROUP_SIZE): @@ -921,6 +936,26 @@ def run_impl(self, repo_path, build_path): group_counters, group_test_times, log_paths = self.try_run_test_group( repo_path, group, tests, MAX_RETRY, NUM_WORKERS ) + + for fail_status in ("ERROR", "FAILED"): + for failed_test in group_counters[fail_status]: + if failed_test in known_broken_tests.keys(): + fail_message = known_broken_tests[failed_test].get("message") + if not fail_message: + mark_as_broken = True + else: + mark_as_broken = False + for log_path in log_paths: + if log_path.endswith(".log"): + with open(log_path) as log_file: + if fail_message in log_file.read(): + mark_as_broken = True + break + + if mark_as_broken: + group_counters[fail_status].remove(failed_test) + group_counters["BROKEN"].append(failed_test) + total_tests = 0 for counter, value in group_counters.items(): logging.info( From d4de139b828e85d34fe649f4a93438a07aeb033d Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:24:06 -0400 Subject: [PATCH 05/24] update broken_tests.json --- tests/broken_tests.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/broken_tests.json b/tests/broken_tests.json index 28ffc977d28b..2af5168381b0 100644 --- a/tests/broken_tests.json +++ b/tests/broken_tests.json @@ -1,11 +1,11 @@ { - "test_replicated_merge_tree_replicated_db_ttl/test.py::test_replicated_db_and_ttl": { - "message": "DB::Exception: Replicated is an experimental database engine.", - "reason": "Will not work without allow_experimental_database_replicated=1" + "test_postgresql_replica_database_engine_2/test.py::test_quoting_publication": { + "message": "DB::Exception: Syntax error:", + "reason": "syntax error" }, - "test_storage_s3_queue/test.py::test_upgrade": { - "message": "DB::Exception: S3Queue is experimental.", - "reason": "Will not work without allow_experimental_s3queue=1" + "test_distributed_inter_server_secret/test.py::test_secure_cluster_distributed_over_distributed_different_users": { + "message": "DB::NetException: Connection reset by peer, while reading from socket", + "reason": "network issue" }, "02920_alter_column_of_projections": { "reason": "requires different settings" From 293ea8b110e76ca69e756c9ad755f3f78c7196cd Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Fri, 8 Nov 2024 13:12:32 -0500 Subject: [PATCH 06/24] update broken_tests.json --- tests/broken_tests.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/broken_tests.json b/tests/broken_tests.json index 2af5168381b0..412cc3a4ede4 100644 --- a/tests/broken_tests.json +++ b/tests/broken_tests.json @@ -12,5 +12,11 @@ }, "02888_system_tables_with_inaccsessible_table_function": { "reason": "todo investigate" + }, + "03094_grouparraysorted_memory": { + "reason": "fails with tsan" + }, + "02700_s3_part_INT_MAX": { + "reason": "fails with asan" } } \ No newline at end of file From 7ed2719730e16551bf2150bc697fd8cf0e582cc1 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Thu, 14 Nov 2024 11:26:08 -0500 Subject: [PATCH 07/24] update hdfs image --- tests/integration/compose/docker_compose_hdfs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/compose/docker_compose_hdfs.yml b/tests/integration/compose/docker_compose_hdfs.yml index 1cae54ad9e1a..40a10df01f7c 100644 --- a/tests/integration/compose/docker_compose_hdfs.yml +++ b/tests/integration/compose/docker_compose_hdfs.yml @@ -1,7 +1,7 @@ version: '2.3' services: hdfs1: - image: sequenceiq/hadoop-docker:2.7.0 + image: prasanthj/docker-hadoop:2.6.0 hostname: hdfs1 restart: always expose: From 7fa57c3f05fd3417a1cc80afecde5f5cdc83b819 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:25:25 -0500 Subject: [PATCH 08/24] fix stateless test crossout --- tests/broken_tests.json | 2 +- tests/ci/functional_test_check.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/broken_tests.json b/tests/broken_tests.json index 412cc3a4ede4..b0d374916c12 100644 --- a/tests/broken_tests.json +++ b/tests/broken_tests.json @@ -14,7 +14,7 @@ "reason": "todo investigate" }, "03094_grouparraysorted_memory": { - "reason": "fails with tsan" + "reason": "flaky" }, "02700_s3_part_INT_MAX": { "reason": "fails with asan" diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 14c07665642e..4cd022c6bf81 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -96,6 +96,8 @@ def get_run_command( f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt " if "analyzer" not in check_name else "" + ) + volume_with_broken_test += ( f"--volume={repo_path}/tests/broken_tests.json:/broken_tests.json " ) From ee493a9fc32e77013c9d81859e1b5105959fa8b3 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Mon, 3 Mar 2025 14:23:15 -0500 Subject: [PATCH 09/24] update broken tests list with upstream fails --- tests/broken_tests.json | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tests/broken_tests.json b/tests/broken_tests.json index b0d374916c12..a6da072c8b86 100644 --- a/tests/broken_tests.json +++ b/tests/broken_tests.json @@ -18,5 +18,41 @@ }, "02700_s3_part_INT_MAX": { "reason": "fails with asan" + }, + "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_avg":{ + "reason": "not run by upstream" + }, + "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[1000]":{ + "reason": "not run by upstream" + }, + "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[500000] ":{ + "reason": "not run by upstream" + }, + "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[1000]":{ + "reason": "not run by upstream" + }, + "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000]":{ + "reason": "not run by upstream" + }, + "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[cache-True] ":{ + "reason": "errors upstream" + }, + "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[direct-True]":{ + "reason": "errors upstream" + }, + "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[flat-True]":{ + "reason": "errors upstream" + }, + "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[hashed-True]":{ + "reason": "errors upstream" + }, + "test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py::test_simple_ssl[flat-True]":{ + "reason": "errors upstream" + }, + "test_storage_mongodb/test.py::test_secure_connection[True]":{ + "reason": "fails upstream" + }, + "test_table_function_mongodb/test.py::test_secure_connection[True]":{ + "reason": "fails upstream" } -} \ No newline at end of file +} From 3b136bcdd5164633151c701fb7c3c8da28bb4ae8 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Mon, 3 Mar 2025 14:42:50 -0500 Subject: [PATCH 10/24] Adding automated report --- .github/create_combined_ci_report.py | 294 +++++++++++++++++++++++++ .github/workflows/release_branches.yml | 28 +++ 2 files changed, 322 insertions(+) create mode 100755 .github/create_combined_ci_report.py diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py new file mode 100755 index 000000000000..4d90171b03d4 --- /dev/null +++ b/.github/create_combined_ci_report.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +import argparse +import os +from pathlib import Path +from itertools import combinations +import json + +import requests +from clickhouse_driver import Client +import boto3 +from botocore.exceptions import NoCredentialsError + +DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST" +DATABASE_USER_VAR = "CHECKS_DATABASE_USER" +DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD" +S3_BUCKET = "altinity-build-artifacts" + + +def get_checks_fails(client: Client, job_url: str): + """ + Get tests that did not succeed for the given job URL. + Exclude checks that have status 'error' as they are counted in get_checks_errors. + """ + columns = ( + "check_status, check_name, test_status, test_name, report_url as results_link" + ) + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url='{job_url}' + AND test_status IN ('FAIL', 'ERROR') + AND check_status!='error' + ORDER BY check_name, test_name + """ + return client.query_dataframe(query) + + +def get_checks_known_fails(client: Client, job_url: str, known_fails: dict): + """ + Get tests that are known to fail for the given job URL. + """ + assert len(known_fails) > 0, "cannot query the database with empty known fails" + columns = ( + "check_status, check_name, test_status, test_name, report_url as results_link" + ) + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url='{job_url}' + AND test_status='BROKEN' + AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) + ORDER BY test_name, check_name + """ + + df = client.query_dataframe(query) + + df.insert( + len(df.columns) - 1, + "reason", + df["test_name"] + .cat.remove_unused_categories() + .apply( + lambda test_name: known_fails[test_name].get("reason", "No reason given") + ), + ) + + return df + + +def get_checks_errors(client: Client, job_url: str): + """ + Get checks that have status 'error' for the given job URL. + """ + columns = ( + "check_status, check_name, test_status, test_name, report_url as results_link" + ) + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url='{job_url}' + AND check_status=='error' + ORDER BY check_name, test_name + """ + return client.query_dataframe(query) + + +def drop_prefix_rows(df, column_to_clean): + """ + Drop rows from the dataframe if: + - the row matches another row completely except for the specified column + - the specified column of that row is a prefix of the same column in another row + """ + to_drop = set() + reference_columns = [col for col in df.columns if col != column_to_clean] + for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2): + if all(row_1[col] == row_2[col] for col in reference_columns): + if row_2[column_to_clean].startswith(row_1[column_to_clean]): + to_drop.add(i) + elif row_1[column_to_clean].startswith(row_2[column_to_clean]): + to_drop.add(j) + return df.drop(to_drop) + + +def get_regression_fails(client: Client, job_url: str): + """ + Get regression tests that did not succeed for the given job URL. + """ + # If you rename the alias for report_url, also update the formatters in format_results_as_html_table + # Nested SELECT handles test reruns + query = f"""SELECT arch, job_name, status, test_name, results_link + FROM ( + SELECT + architecture as arch, + test_name, + argMax(result, start_time) AS status, + job_url, + job_name, + report_url as results_link + FROM `gh-data`.clickhouse_regression_results + GROUP BY architecture, test_name, job_url, job_name, report_url, start_time + ORDER BY start_time DESC, length(test_name) DESC + ) + WHERE job_url='{job_url}' + AND status IN ('Fail', 'Error') + """ + df = client.query_dataframe(query) + df = drop_prefix_rows(df, "test_name") + df["job_name"] = df["job_name"].str.title() + return df + + +def url_to_html_link(url: str) -> str: + if not url: + return "" + text = url.split("/")[-1] + if not text: + text = "results" + return f'{text}' + + +def format_test_name_for_linewrap(text: str) -> str: + """Tweak the test name to improve line wrapping.""" + return text.replace(".py::", "/") + + +def format_results_as_html_table(results) -> str: + if len(results) == 0: + return "

Nothing to report

" + results.columns = [col.replace("_", " ").title() for col in results.columns] + html = ( + results.to_html( + index=False, + formatters={ + "Results Link": url_to_html_link, + "Test Name": format_test_name_for_linewrap, + }, + escape=False, + ) # tbody/thead tags interfere with the table sorting script + .replace("\n", "") + .replace("\n", "") + .replace("\n", "") + .replace("\n", "") + .replace(' argparse.Namespace: + parser = argparse.ArgumentParser(description="Create a combined CI report.") + parser.add_argument( + "--actions-run-url", required=True, help="URL of the actions run" + ) + parser.add_argument( + "--pr-number", required=True, help="Pull request number for the S3 path" + ) + parser.add_argument( + "--commit-sha", required=True, help="Commit SHA for the S3 path" + ) + parser.add_argument( + "--no-upload", action="store_true", help="Do not upload the report" + ) + parser.add_argument( + "--known-fails", type=str, help="Path to the file with known fails" + ) + parser.add_argument( + "--mark-preview", action="store_true", help="Mark the report as a preview" + ) + return parser.parse_args() + + +def main(): + args = parse_args() + + db_client = Client( + host=os.getenv(DATABASE_HOST_VAR), + user=os.getenv(DATABASE_USER_VAR), + password=os.getenv(DATABASE_PASSWORD_VAR), + port=9440, + secure="y", + verify=False, + settings={"use_numpy": True}, + ) + + s3_path = ( + f"https://s3.amazonaws.com/{S3_BUCKET}/{args.pr_number}/{args.commit_sha}/" + ) + report_destination_url = s3_path + "combined_report.html" + ci_running_report_url = s3_path + "ci_running.html" + + response = requests.get(ci_running_report_url) + if response.status_code == 200: + ci_running_report: str = response.text + else: + print( + f"Failed to download CI running report. Status code: {response.status_code}, Response: {response.text}" + ) + exit(1) + + fail_results = { + "checks_fails": get_checks_fails(db_client, args.actions_run_url), + "checks_known_fails": [], + "checks_errors": get_checks_errors(db_client, args.actions_run_url), + "regression_fails": get_regression_fails(db_client, args.actions_run_url), + } + + if args.known_fails: + if not os.path.exists(args.known_fails): + print(f"Known fails file {args.known_fails} not found.") + exit(1) + + with open(args.known_fails) as f: + known_fails = json.load(f) + + if known_fails: + fail_results["checks_known_fails"] = get_checks_known_fails( + db_client, args.actions_run_url, known_fails + ) + + combined_report = ( + ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for") + .replace( + "
", + f"""

Table of Contents

+{'

This is a preview. FinishCheck has not completed.

' if args.mark_preview else ""} + + +

CI Jobs Status

+
""", + 1, + ) + .replace( + "
", + f""" + +

Checks Errors

+{format_results_as_html_table(fail_results['checks_errors'])} + +

Checks New Fails

+{format_results_as_html_table(fail_results['checks_fails'])} + +

Regression New Fails

+{format_results_as_html_table(fail_results['regression_fails'])} + +

Checks Known Fails

+{format_results_as_html_table(fail_results['checks_known_fails'])} +""", + 1, + ) + ) + report_path = Path("combined_report.html") + report_path.write_text(combined_report, encoding="utf-8") + + if args.no_upload: + print(f"Report saved to {report_path}") + exit(0) + + # Upload the report to S3 + s3_client = boto3.client("s3") + + try: + s3_client.put_object( + Bucket=S3_BUCKET, + Key=f"{args.pr_number}/{args.commit_sha}/combined_report.html", + Body=combined_report, + ContentType="text/html; charset=utf-8", + ) + except NoCredentialsError: + print("Credentials not available for S3 upload.") + + print(report_destination_url) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index b2495ac0f814..ee26102405a7 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -549,3 +549,31 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + - name: Create and upload combined report + IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') + if [[ -n $IS_VALID_URL ]]; then + echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY + else + echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY + exit 1 + fi if: ${{ !cancelled() }} + env: + CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} + CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }} + CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }} + COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + PR_NUMBER: ${{ github.event.number }} + ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} + shell: bash + run: | + pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0 + + REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json) + + IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') + if [[ -n $IS_VALID_URL ]]; then + echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY + else + echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY + exit 1 + fi \ No newline at end of file From 23e18745c1877e0a0221ceaa882c45f97e2acf52 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Mon, 3 Mar 2025 15:19:53 -0500 Subject: [PATCH 11/24] fix workflow error --- .github/workflows/release_branches.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index ee26102405a7..ccba3503b3e7 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -550,13 +550,6 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py - name: Create and upload combined report - IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') - if [[ -n $IS_VALID_URL ]]; then - echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY - else - echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY - exit 1 - fi if: ${{ !cancelled() }} env: CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }} From eb4a33201d6b071b034756fa7a24b2f85e469c71 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Tue, 4 Mar 2025 08:11:44 -0500 Subject: [PATCH 12/24] Always run finishcheck --- .github/workflows/release_branches.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index ccba3503b3e7..6b19cab0cfdf 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -506,7 +506,7 @@ jobs: runner_type: altinity-style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: - DockerServerImage - DockerKeeperImage From 336b35be8681e465a4ff28d2c3e19f9131879e78 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Tue, 4 Mar 2025 11:55:53 -0500 Subject: [PATCH 13/24] fix extra space in broken test name --- tests/broken_tests.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/broken_tests.json b/tests/broken_tests.json index a6da072c8b86..d9d0008ef88f 100644 --- a/tests/broken_tests.json +++ b/tests/broken_tests.json @@ -34,7 +34,7 @@ "test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000]":{ "reason": "not run by upstream" }, - "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[cache-True] ":{ + "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[cache-True]": { "reason": "errors upstream" }, "test_dictionaries_all_layouts_separate_sources/test_mongo.py::test_simple_ssl[direct-True]":{ From 16f0806b759521623ee280de06ec2ab4c71f48d0 Mon Sep 17 00:00:00 2001 From: Stuart <146047128+strtgbb@users.noreply.github.com> Date: Wed, 5 Mar 2025 08:57:56 -0500 Subject: [PATCH 14/24] fix find and replace --- .github/create_combined_ci_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py index 4d90171b03d4..19c7a6c05472 100755 --- a/.github/create_combined_ci_report.py +++ b/.github/create_combined_ci_report.py @@ -231,7 +231,7 @@ def main(): ) combined_report = ( - ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for") + ci_running_report.replace("ClickHouse CI Running for", "Combined CI Report for") .replace( "", f"""

Table of Contents

From e0b41afa70284edbb78856b6928146cad3a7a2f6 Mon Sep 17 00:00:00 2001 From: Your Name <146047128+strtgbb@users.noreply.github.com> Date: Thu, 13 Mar 2025 22:33:58 -0400 Subject: [PATCH 15/24] update regression hash --- .github/workflows/release_branches.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6b19cab0cfdf..f6c3621831ba 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -472,7 +472,7 @@ jobs: secrets: inherit with: runner_type: altinity-type-cpx51, altinity-image-x86-app-docker-ce, altinity-setup-regression - commit: 53d73ed32155a8a17ee0d0cdb15aee96c98010a2 + commit: 0fdb555b36d0ea6a6affc5cf87e593b5d8944c0a arch: release build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout_minutes: 300 @@ -483,7 +483,7 @@ jobs: secrets: inherit with: runner_type: altinity-type-cax41, altinity-image-arm-app-docker-ce, altinity-setup-regression - commit: 53d73ed32155a8a17ee0d0cdb15aee96c98010a2 + commit: 0fdb555b36d0ea6a6affc5cf87e593b5d8944c0a arch: aarch64 build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout_minutes: 300 From c6828583992efff1097aafe3c864f44eb6909d60 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:11:51 -0400 Subject: [PATCH 16/24] add grype scan --- .github/workflows/release_branches.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index e8540026371d..e962e1391ce7 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -168,6 +168,23 @@ jobs: test_name: Docker keeper image runner_type: altinity-func-tester data: ${{ needs.RunConfig.outputs.data }} + GrypeScan: + needs: [RunConfig, DockerServerImage, DockerKeeperImage] + if: ${{ !failure() && !cancelled() }} + strategy: + fail-fast: false + matrix: + include: + - image: server + suffix: '' + - image: server + suffix: '-alpine' + - image: keeper + suffix: '' + uses: ./.github/workflows/grype_scan.yml@antalya + secrets: inherit + with: + docker_image: altinityinfra/clickhouse-${{ matrix.image }}:${{ github.event.pull_request.number || 0 }}-${{ fromJson(needs.RunConfig.outputs.data).version }}${{ matrix.suffix }} ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ @@ -555,7 +572,7 @@ jobs: CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }} CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }} COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - PR_NUMBER: ${{ github.event.number }} + PR_NUMBER: ${{ github.event.pull_request.number || 0 }} ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} shell: bash run: | From 45779003d7141ad171a611ea39ab74adfcf31aec Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:20:05 -0400 Subject: [PATCH 17/24] update report to new format --- .github/create_combined_ci_report.py | 294 ------------ .github/create_workflow_report.py | 631 +++++++++++++++++++++++++ .github/workflows/release_branches.yml | 4 +- 3 files changed, 633 insertions(+), 296 deletions(-) delete mode 100755 .github/create_combined_ci_report.py create mode 100755 .github/create_workflow_report.py diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py deleted file mode 100755 index 19c7a6c05472..000000000000 --- a/.github/create_combined_ci_report.py +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import os -from pathlib import Path -from itertools import combinations -import json - -import requests -from clickhouse_driver import Client -import boto3 -from botocore.exceptions import NoCredentialsError - -DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST" -DATABASE_USER_VAR = "CHECKS_DATABASE_USER" -DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD" -S3_BUCKET = "altinity-build-artifacts" - - -def get_checks_fails(client: Client, job_url: str): - """ - Get tests that did not succeed for the given job URL. - Exclude checks that have status 'error' as they are counted in get_checks_errors. - """ - columns = ( - "check_status, check_name, test_status, test_name, report_url as results_link" - ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' - AND test_status IN ('FAIL', 'ERROR') - AND check_status!='error' - ORDER BY check_name, test_name - """ - return client.query_dataframe(query) - - -def get_checks_known_fails(client: Client, job_url: str, known_fails: dict): - """ - Get tests that are known to fail for the given job URL. - """ - assert len(known_fails) > 0, "cannot query the database with empty known fails" - columns = ( - "check_status, check_name, test_status, test_name, report_url as results_link" - ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' - AND test_status='BROKEN' - AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) - ORDER BY test_name, check_name - """ - - df = client.query_dataframe(query) - - df.insert( - len(df.columns) - 1, - "reason", - df["test_name"] - .cat.remove_unused_categories() - .apply( - lambda test_name: known_fails[test_name].get("reason", "No reason given") - ), - ) - - return df - - -def get_checks_errors(client: Client, job_url: str): - """ - Get checks that have status 'error' for the given job URL. - """ - columns = ( - "check_status, check_name, test_status, test_name, report_url as results_link" - ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' - AND check_status=='error' - ORDER BY check_name, test_name - """ - return client.query_dataframe(query) - - -def drop_prefix_rows(df, column_to_clean): - """ - Drop rows from the dataframe if: - - the row matches another row completely except for the specified column - - the specified column of that row is a prefix of the same column in another row - """ - to_drop = set() - reference_columns = [col for col in df.columns if col != column_to_clean] - for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2): - if all(row_1[col] == row_2[col] for col in reference_columns): - if row_2[column_to_clean].startswith(row_1[column_to_clean]): - to_drop.add(i) - elif row_1[column_to_clean].startswith(row_2[column_to_clean]): - to_drop.add(j) - return df.drop(to_drop) - - -def get_regression_fails(client: Client, job_url: str): - """ - Get regression tests that did not succeed for the given job URL. - """ - # If you rename the alias for report_url, also update the formatters in format_results_as_html_table - # Nested SELECT handles test reruns - query = f"""SELECT arch, job_name, status, test_name, results_link - FROM ( - SELECT - architecture as arch, - test_name, - argMax(result, start_time) AS status, - job_url, - job_name, - report_url as results_link - FROM `gh-data`.clickhouse_regression_results - GROUP BY architecture, test_name, job_url, job_name, report_url, start_time - ORDER BY start_time DESC, length(test_name) DESC - ) - WHERE job_url='{job_url}' - AND status IN ('Fail', 'Error') - """ - df = client.query_dataframe(query) - df = drop_prefix_rows(df, "test_name") - df["job_name"] = df["job_name"].str.title() - return df - - -def url_to_html_link(url: str) -> str: - if not url: - return "" - text = url.split("/")[-1] - if not text: - text = "results" - return f'{text}' - - -def format_test_name_for_linewrap(text: str) -> str: - """Tweak the test name to improve line wrapping.""" - return text.replace(".py::", "/") - - -def format_results_as_html_table(results) -> str: - if len(results) == 0: - return "

Nothing to report

" - results.columns = [col.replace("_", " ").title() for col in results.columns] - html = ( - results.to_html( - index=False, - formatters={ - "Results Link": url_to_html_link, - "Test Name": format_test_name_for_linewrap, - }, - escape=False, - ) # tbody/thead tags interfere with the table sorting script - .replace("\n", "") - .replace("\n", "") - .replace("\n", "") - .replace("\n", "") - .replace('
argparse.Namespace: - parser = argparse.ArgumentParser(description="Create a combined CI report.") - parser.add_argument( - "--actions-run-url", required=True, help="URL of the actions run" - ) - parser.add_argument( - "--pr-number", required=True, help="Pull request number for the S3 path" - ) - parser.add_argument( - "--commit-sha", required=True, help="Commit SHA for the S3 path" - ) - parser.add_argument( - "--no-upload", action="store_true", help="Do not upload the report" - ) - parser.add_argument( - "--known-fails", type=str, help="Path to the file with known fails" - ) - parser.add_argument( - "--mark-preview", action="store_true", help="Mark the report as a preview" - ) - return parser.parse_args() - - -def main(): - args = parse_args() - - db_client = Client( - host=os.getenv(DATABASE_HOST_VAR), - user=os.getenv(DATABASE_USER_VAR), - password=os.getenv(DATABASE_PASSWORD_VAR), - port=9440, - secure="y", - verify=False, - settings={"use_numpy": True}, - ) - - s3_path = ( - f"https://s3.amazonaws.com/{S3_BUCKET}/{args.pr_number}/{args.commit_sha}/" - ) - report_destination_url = s3_path + "combined_report.html" - ci_running_report_url = s3_path + "ci_running.html" - - response = requests.get(ci_running_report_url) - if response.status_code == 200: - ci_running_report: str = response.text - else: - print( - f"Failed to download CI running report. Status code: {response.status_code}, Response: {response.text}" - ) - exit(1) - - fail_results = { - "checks_fails": get_checks_fails(db_client, args.actions_run_url), - "checks_known_fails": [], - "checks_errors": get_checks_errors(db_client, args.actions_run_url), - "regression_fails": get_regression_fails(db_client, args.actions_run_url), - } - - if args.known_fails: - if not os.path.exists(args.known_fails): - print(f"Known fails file {args.known_fails} not found.") - exit(1) - - with open(args.known_fails) as f: - known_fails = json.load(f) - - if known_fails: - fail_results["checks_known_fails"] = get_checks_known_fails( - db_client, args.actions_run_url, known_fails - ) - - combined_report = ( - ci_running_report.replace("ClickHouse CI Running for", "Combined CI Report for") - .replace( - "
", - f"""

Table of Contents

-{'

This is a preview. FinishCheck has not completed.

' if args.mark_preview else ""} - - -

CI Jobs Status

-
""", - 1, - ) - .replace( - "
", - f""" - -

Checks Errors

-{format_results_as_html_table(fail_results['checks_errors'])} - -

Checks New Fails

-{format_results_as_html_table(fail_results['checks_fails'])} - -

Regression New Fails

-{format_results_as_html_table(fail_results['regression_fails'])} - -

Checks Known Fails

-{format_results_as_html_table(fail_results['checks_known_fails'])} -""", - 1, - ) - ) - report_path = Path("combined_report.html") - report_path.write_text(combined_report, encoding="utf-8") - - if args.no_upload: - print(f"Report saved to {report_path}") - exit(0) - - # Upload the report to S3 - s3_client = boto3.client("s3") - - try: - s3_client.put_object( - Bucket=S3_BUCKET, - Key=f"{args.pr_number}/{args.commit_sha}/combined_report.html", - Body=combined_report, - ContentType="text/html; charset=utf-8", - ) - except NoCredentialsError: - print("Credentials not available for S3 upload.") - - print(report_destination_url) - - -if __name__ == "__main__": - main() diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py new file mode 100755 index 000000000000..4257925b826d --- /dev/null +++ b/.github/create_workflow_report.py @@ -0,0 +1,631 @@ +#!/usr/bin/env python3 +import argparse +import os +from pathlib import Path +from itertools import combinations +import json +from datetime import datetime + +import requests +import pandas as pd +from clickhouse_driver import Client +import boto3 +from botocore.exceptions import NoCredentialsError +import pandas as pd + +DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST" +DATABASE_USER_VAR = "CHECKS_DATABASE_USER" +DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD" +S3_BUCKET = "altinity-build-artifacts" + + +css = """ + /* Base colors for Altinity */ + :root { + --altinity-background: #000D45; + --altinity-accent: #189DCF; + --altinity-highlight: #FFC600; + --altinity-gray: #6c757d; + --altinity-light-gray: #f8f9fa; + --altinity-white: #ffffff; + } + + /* Body and heading fonts */ + body { + font-family: Arimo, "Proxima Nova", "Helvetica Neue", Helvetica, Arial, sans-serif; + font-size: 1rem; + background-color: var(--altinity-background); + color: var(--altinity-light-gray); + padding: 2rem; + } + + h1, h2, h3, h4, h5, h6 { + font-family: Figtree, "Proxima Nova", "Helvetica Neue", Helvetica, Arial, sans-serif; + color: var(--altinity-white); + } + + .logo { + width: auto; + height: 5em; + } + + /* General table styling */ + table { + min-width: min(900px, 98vw); + margin: 1rem 0; + border-collapse: collapse; + background-color: var(--altinity-white); + border: 1px solid var(--altinity-accent); + box-shadow: 0 0 8px rgba(0, 0, 0, 0.05); + color: var(--altinity-background); + } + + /* Table header styling */ + th { + background-color: var(--altinity-accent); + color: var(--altinity-white); + padding: 10px 16px; + text-align: left; + border: none; + border-bottom: 2px solid var(--altinity-background); + white-space: nowrap; + } + th.hth { + border-bottom: 1px solid var(--altinity-accent); + border-right: 2px solid var(--altinity-background); + } + + /* Table header sorting styling */ + th { + cursor: pointer; + } + th.no-sort { + pointer-events: none; + } + th::after, + th::before { + transition: color 0.2s ease-in-out; + font-size: 1.2em; + color: transparent; + } + th::after { + margin-left: 3px; + content: '\\025B8'; + } + th:hover::after { + color: inherit; + } + th.dir-d::after { + color: inherit; + content: '\\025BE'; + } + th.dir-u::after { + color: inherit; + content: '\\025B4'; + } + + /* Table body row styling */ + tr:hover { + background-color: var(--altinity-light-gray); + } + + /* Table cell styling */ + td { + padding: 8px 8px; + border: 1px solid var(--altinity-accent); + } + + /* Link styling */ + a { + color: var(--altinity-accent); + text-decoration: none; + } + a:hover { + color: var(--altinity-highlight); + text-decoration: underline; + } +""" + +script = """ + +""" + +logo = """ +

+""" + + +def get_commit_statuses(sha: str) -> pd.DataFrame: + """ + Fetch commit statuses for a given SHA and return as a pandas DataFrame. + + Args: + sha (str): Commit SHA to fetch statuses for. + + Returns: + pd.DataFrame: DataFrame containing all statuses. + """ + headers = { + "Authorization": f"token {os.getenv('GITHUB_TOKEN')}", + "Accept": "application/vnd.github.v3+json", + } + + url = f"https://api.github.com/repos/Altinity/ClickHouse/commits/{sha}/statuses" + response = requests.get(url, headers=headers) + + if response.status_code != 200: + raise Exception( + f"Failed to fetch statuses: {response.status_code} {response.text}" + ) + + data = response.json() + + # Parse relevant fields + parsed = [ + { + "job_name": item["context"], + "job_status": item["state"], + "message": item["description"], + "results_link": item["target_url"], + } + for item in data + ] + + return ( + pd.DataFrame(parsed) + .sort_values(by=["job_status", "job_name"], ascending=[True, True]) + .reset_index(drop=True) + ) + + +def get_pr_info_from_number(pr_number: str) -> dict: + """ + Fetch pull request information for a given PR number. + + Args: + pr_number (str): Pull request number to fetch information for. + + Returns: + dict: Dictionary containing PR information. + """ + headers = { + "Authorization": f"token {os.getenv('GITHUB_TOKEN')}", + "Accept": "application/vnd.github.v3+json", + } + + url = f"https://api.github.com/repos/Altinity/ClickHouse/pulls/{pr_number}" + response = requests.get(url, headers=headers) + + if response.status_code != 200: + raise Exception( + f"Failed to fetch pull request info: {response.status_code} {response.text}" + ) + + return response.json() + + +def get_checks_fails(client: Client, job_url: str): + """ + Get tests that did not succeed for the given job URL. + Exclude checks that have status 'error' as they are counted in get_checks_errors. + """ + columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url LIKE '{job_url}%' + AND test_status IN ('FAIL', 'ERROR') + AND check_status!='error' + ORDER BY check_name, test_name + """ + return client.query_dataframe(query) + + +def get_checks_known_fails(client: Client, job_url: str, known_fails: dict): + """ + Get tests that are known to fail for the given job URL. + """ + assert len(known_fails) > 0, "cannot query the database with empty known fails" + columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url LIKE '{job_url}%' + AND test_status='BROKEN' + AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) + ORDER BY test_name, check_name + """ + + df = client.query_dataframe(query) + + df.insert( + len(df.columns) - 1, + "reason", + df["test_name"] + .astype(str) + .apply( + lambda test_name: known_fails[test_name].get("reason", "No reason given") + ), + ) + + return df + + +def get_checks_errors(client: Client, job_url: str): + """ + Get checks that have status 'error' for the given job URL. + """ + columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url LIKE '{job_url}%' + AND check_status=='error' + ORDER BY check_name, test_name + """ + return client.query_dataframe(query) + + +def drop_prefix_rows(df, column_to_clean): + """ + Drop rows from the dataframe if: + - the row matches another row completely except for the specified column + - the specified column of that row is a prefix of the same column in another row + """ + to_drop = set() + reference_columns = [col for col in df.columns if col != column_to_clean] + for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2): + if all(row_1[col] == row_2[col] for col in reference_columns): + if row_2[column_to_clean].startswith(row_1[column_to_clean]): + to_drop.add(i) + elif row_1[column_to_clean].startswith(row_2[column_to_clean]): + to_drop.add(j) + return df.drop(to_drop) + + +def get_regression_fails(client: Client, job_url: str): + """ + Get regression tests that did not succeed for the given job URL. + """ + # If you rename the alias for report_url, also update the formatters in format_results_as_html_table + # Nested SELECT handles test reruns + query = f"""SELECT arch, job_name, status, test_name, results_link + FROM ( + SELECT + architecture as arch, + test_name, + argMax(result, start_time) AS status, + job_url, + job_name, + report_url as results_link + FROM `gh-data`.clickhouse_regression_results + GROUP BY architecture, test_name, job_url, job_name, report_url + ORDER BY length(test_name) DESC + ) + WHERE job_url='{job_url}' + AND status IN ('Fail', 'Error') + """ + df = client.query_dataframe(query) + df = drop_prefix_rows(df, "test_name") + df["job_name"] = df["job_name"].str.title() + return df + + +def get_cves(pr_number, commit_sha): + s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL")) + s3_prefix = f"{pr_number}/{commit_sha}/grype/" + + results = [] + + response = s3_client.list_objects_v2( + Bucket=S3_BUCKET, Prefix=s3_prefix, Delimiter="/" + ) + grype_result_dirs = [ + content["Prefix"] for content in response.get("CommonPrefixes", []) + ] + + for path in grype_result_dirs: + file_key = f"{path}result.json" + file_response = s3_client.get_object(Bucket=S3_BUCKET, Key=file_key) + content = file_response["Body"].read().decode("utf-8") + results.append(json.loads(content)) + + rows = [] + for scan_result in results: + for match in scan_result["matches"]: + rows.append( + { + "docker_image": scan_result["source"]["target"]["userInput"], + "severity": match["vulnerability"]["severity"], + "identifier": match["vulnerability"]["id"], + "namespace": match["vulnerability"]["namespace"], + } + ) + + if len(rows) == 0: + return pd.DataFrame() + + df = pd.DataFrame(rows).drop_duplicates() + df = df.sort_values( + by="severity", + key=lambda col: col.str.lower().map( + {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5} + ), + ) + return df + + +def url_to_html_link(url: str) -> str: + if not url: + return "" + text = url.split("/")[-1] + if not text: + text = "results" + return f'{text}' + + +def format_test_name_for_linewrap(text: str) -> str: + """Tweak the test name to improve line wrapping.""" + return text.replace(".py::", "/") + + +def format_test_status(text: str) -> str: + """Format the test status for better readability.""" + color = ( + "red" + if text.lower().startswith("fail") + else "orange" if text.lower() in ("error", "broken") else "green" + ) + return f'{text}' + + +def format_results_as_html_table(results) -> str: + if len(results) == 0: + return "

Nothing to report

" + results.columns = [col.replace("_", " ").title() for col in results.columns] + html = results.to_html( + index=False, + formatters={ + "Results Link": url_to_html_link, + "Test Name": format_test_name_for_linewrap, + "Test Status": format_test_status, + "Job Status": format_test_status, + "Status": format_test_status, + "Message": lambda m: m.replace("\n", " "), + "Identifier": lambda i: url_to_html_link( + "https://nvd.nist.gov/vuln/detail/" + i + ), + }, + escape=False, + ).replace(' border="1"', "") + return html + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Create a combined CI report.") + parser.add_argument( + "--actions-run-url", required=True, help="URL of the actions run" + ) + parser.add_argument( + "--pr-number", required=True, help="Pull request number for the S3 path" + ) + parser.add_argument( + "--commit-sha", required=True, help="Commit SHA for the S3 path" + ) + parser.add_argument( + "--no-upload", action="store_true", help="Do not upload the report" + ) + parser.add_argument( + "--known-fails", type=str, help="Path to the file with known fails" + ) + parser.add_argument( + "--cves", action="store_true", help="Get CVEs from Grype results" + ) + parser.add_argument( + "--mark-preview", action="store_true", help="Mark the report as a preview" + ) + return parser.parse_args() + + +def main(): + args = parse_args() + + db_client = Client( + host=os.getenv(DATABASE_HOST_VAR), + user=os.getenv(DATABASE_USER_VAR), + password=os.getenv(DATABASE_PASSWORD_VAR), + port=9440, + secure="y", + verify=False, + settings={"use_numpy": True}, + ) + + fail_results = { + "job_statuses": get_commit_statuses(args.commit_sha), + "checks_fails": get_checks_fails(db_client, args.actions_run_url), + "checks_known_fails": [], + "checks_errors": get_checks_errors(db_client, args.actions_run_url), + "regression_fails": get_regression_fails(db_client, args.actions_run_url), + "docker_images_cves": ( + [] if not args.cves else get_cves(args.pr_number, args.commit_sha) + ), + } + + if args.known_fails: + if not os.path.exists(args.known_fails): + print(f"Known fails file {args.known_fails} not found.") + exit(1) + + with open(args.known_fails) as f: + known_fails = json.load(f) + + if known_fails: + fail_results["checks_known_fails"] = get_checks_known_fails( + db_client, args.actions_run_url, known_fails + ) + + if args.pr_number == "0": + pr_info_html = "Release" + else: + try: + pr_info = get_pr_info_from_number(args.pr_number) + pr_info_html = f""" + #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")} + """ + except Exception as e: + pr_info_html = e + + high_cve_count = 0 + if len(fail_results["docker_images_cves"]) > 0: + high_cve_count = ( + fail_results["docker_images_cves"]["severity"] + .str.lower() + .isin(("high", "critical")) + .sum() + ) + + title = "ClickHouse® CI Workflow Run Report" + + html_report = f""" + + + + + + + {title} + + + {logo} +

{title}

+ + + + + + + + + + + + + +
Pull Request{pr_info_html}
Workflow Run{args.actions_run_url.split('/')[-1]}
Commit{args.commit_sha}
Date{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC
+ +

Table of Contents

+{'

This is a preview. FinishCheck has not completed.

' if args.mark_preview else ""} + + +

CI Jobs Status

+{format_results_as_html_table(fail_results['job_statuses'])} + +

Checks Errors

+{format_results_as_html_table(fail_results['checks_errors'])} + +

Checks New Fails

+{format_results_as_html_table(fail_results['checks_fails'])} + +

Regression New Fails

+{format_results_as_html_table(fail_results['regression_fails'])} + +

Docker Images CVEs

+{"

Not Checked

" if not args.cves else format_results_as_html_table(fail_results['docker_images_cves'])} + +

Checks Known Fails

+{"

Not Checked

" if not args.known_fails else format_results_as_html_table(fail_results['checks_known_fails'])} + +{script} + + +""" + report_name = "ci_run_report.html" + report_path = Path(report_name) + report_path.write_text(html_report, encoding="utf-8") + + if args.no_upload: + print(f"Report saved to {report_path}") + exit(0) + + report_destination_key = f"{args.pr_number}/{args.commit_sha}/{report_name}" + + # Upload the report to S3 + s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL")) + + try: + s3_client.put_object( + Bucket=S3_BUCKET, + Key=report_destination_key, + Body=html_report, + ContentType="text/html; charset=utf-8", + ) + except NoCredentialsError: + print("Credentials not available for S3 upload.") + + print(f"https://s3.amazonaws.com/{S3_BUCKET}/" + report_destination_key) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index e962e1391ce7..2a2eeecc0fad 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -576,9 +576,9 @@ jobs: ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} shell: bash run: | - pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0 + pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 - REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json) + REPORT_LINK=$(python3 .github/create_workflow_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves) IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') if [[ -n $IS_VALID_URL ]]; then From c12ca0ed5f58c2e4aec99ffb4f1fc0e1e5fda8bd Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:25:36 -0400 Subject: [PATCH 18/24] fix 'cannot specify version when calling local workflows' --- .github/grype/parse_vulnerabilities_grype.py | 32 +++++ .github/grype/run_grype_scan.sh | 18 +++ .../grype/transform_and_upload_results_s3.sh | 13 ++ .github/workflows/grype_scan.yml | 131 ++++++++++++++++++ .github/workflows/release_branches.yml | 2 +- 5 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 .github/grype/parse_vulnerabilities_grype.py create mode 100755 .github/grype/run_grype_scan.sh create mode 100755 .github/grype/transform_and_upload_results_s3.sh create mode 100644 .github/workflows/grype_scan.yml diff --git a/.github/grype/parse_vulnerabilities_grype.py b/.github/grype/parse_vulnerabilities_grype.py new file mode 100644 index 000000000000..fec2ef3bfac7 --- /dev/null +++ b/.github/grype/parse_vulnerabilities_grype.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +import json + +from testflows.core import * + +xfails = {} + + +@Name("docker vulnerabilities") +@XFails(xfails) +@TestModule +def docker_vulnerabilities(self): + with Given("I gather grype scan results"): + with open("./result.json", "r") as f: + results = json.load(f) + + for vulnerability in results["matches"]: + with Test( + f"{vulnerability['vulnerability']['id']}@{vulnerability['vulnerability']['namespace']},{vulnerability['vulnerability']['severity']}", + flags=TE, + ): + note(vulnerability) + critical_levels = set(["HIGH", "CRITICAL"]) + if vulnerability['vulnerability']["severity"].upper() in critical_levels: + with Then( + f"Found vulnerability of {vulnerability['vulnerability']['severity']} severity" + ): + result(Fail) + + +if main(): + docker_vulnerabilities() diff --git a/.github/grype/run_grype_scan.sh b/.github/grype/run_grype_scan.sh new file mode 100755 index 000000000000..c5ce0b1b10d3 --- /dev/null +++ b/.github/grype/run_grype_scan.sh @@ -0,0 +1,18 @@ +set -x +set -e + +IMAGE=$1 + +GRYPE_VERSION="v0.80.1" + +docker pull $IMAGE +docker pull anchore/grype:${GRYPE_VERSION} + +docker run \ + --rm --volume /var/run/docker.sock:/var/run/docker.sock \ + --name Grype anchore/grype:${GRYPE_VERSION} \ + --scope all-layers \ + -o json \ + $IMAGE > result.json + +ls -sh diff --git a/.github/grype/transform_and_upload_results_s3.sh b/.github/grype/transform_and_upload_results_s3.sh new file mode 100755 index 000000000000..7a10b02887ef --- /dev/null +++ b/.github/grype/transform_and_upload_results_s3.sh @@ -0,0 +1,13 @@ +DOCKER_IMAGE=$(echo "$DOCKER_IMAGE" | sed 's/[\/:]/_/g') + +S3_PATH="s3://$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE" +HTTPS_S3_PATH="https://s3.amazonaws.com/$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE" +echo "https_s3_path=$HTTPS_S3_PATH" >> $GITHUB_OUTPUT + +tfs --no-colors transform nice raw.log nice.log.txt +tfs --no-colors report results -a $HTTPS_S3_PATH raw.log - --copyright "Altinity LTD" | tfs --no-colors document convert > results.html + +aws s3 cp --no-progress nice.log.txt $S3_PATH/nice.log.txt --content-type "text/plain; charset=utf-8" || echo "nice log file not found". +aws s3 cp --no-progress results.html $S3_PATH/results.html || echo "results file not found". +aws s3 cp --no-progress raw.log $S3_PATH/raw.log || echo "raw.log file not found". +aws s3 cp --no-progress result.json $S3_PATH/result.json --content-type "text/plain; charset=utf-8" || echo "result.json not found". \ No newline at end of file diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml new file mode 100644 index 000000000000..1414129fd666 --- /dev/null +++ b/.github/workflows/grype_scan.yml @@ -0,0 +1,131 @@ +name: Grype Scan +run-name: Grype Scan ${{ inputs.docker_image }} + +on: + workflow_dispatch: + # Inputs for manual run + inputs: + docker_image: + description: 'Docker image. If no tag, it will be determined by version_helper.py' + required: true + workflow_call: + # Inputs for workflow call + inputs: + docker_image: + description: 'Docker image. If no tag, it will be determined by version_helper.py' + required: true + type: string +env: + PYTHONUNBUFFERED: 1 + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + +jobs: + grype_scan: + name: Grype Scan + runs-on: [self-hosted, altinity-on-demand, altinity-func-tester-aarch64] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker + uses: docker/setup-buildx-action@v3 + + - name: Set up Python + run: | + export TESTFLOWS_VERSION="2.4.19" + sudo apt-get update + sudo apt-get install -y python3-pip python3-venv + python3 -m venv venv + source venv/bin/activate + pip install --upgrade requests chardet urllib3 + pip install testflows==$TESTFLOWS_VERSION awscli==1.33.28 + echo PATH=$PATH >>$GITHUB_ENV + + - name: Set image tag if not given + if: ${{ !contains(inputs.docker_image, ':') }} + id: set_version + run: | + python3 ./tests/ci/version_helper.py | tee /tmp/version_info + source /tmp/version_info + echo "docker_image=${{ inputs.docker_image }}:${{ github.event.pull_request.number || 0 }}-$CLICKHOUSE_VERSION_STRING" >> $GITHUB_OUTPUT + echo "commit_sha=$CLICKHOUSE_VERSION_GITHASH" >> $GITHUB_OUTPUT + + - name: Run Grype Scan + run: | + DOCKER_IMAGE=${{ steps.set_version.outputs.docker_image || inputs.docker_image }} + ./.github/grype/run_grype_scan.sh $DOCKER_IMAGE + + - name: Parse grype results + run: | + python3 -u ./.github/grype/parse_vulnerabilities_grype.py -o nice --no-colors --log raw.log --test-to-end + + - name: Transform and Upload Grype Results + if: always() + id: upload_results + env: + S3_BUCKET: "altinity-build-artifacts" + COMMIT_SHA: ${{ steps.set_version.outputs.commit_sha || github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + PR_NUMBER: ${{ github.event.pull_request.number || 0 }} + DOCKER_IMAGE: ${{ steps.set_version.outputs.docker_image || inputs.docker_image }} + run: | + ./.github/grype/transform_and_upload_results_s3.sh + + - name: Create step summary + if: always() + id: create_summary + run: | + jq -r '.distro | "**Distro**: \(.name):\(.version)"' result.json >> $GITHUB_STEP_SUMMARY + if jq -e '.matches | length == 0' result.json > /dev/null; then + echo "No CVEs" >> $GITHUB_STEP_SUMMARY + else + echo "| Severity | Count |" >> $GITHUB_STEP_SUMMARY + echo "|------------|-------|" >> $GITHUB_STEP_SUMMARY + jq -r ' + .matches | + map(.vulnerability.severity) | + group_by(.) | + map({severity: .[0], count: length}) | + sort_by(.severity) | + map("| \(.severity) | \(.count) |") | + .[] + ' result.json >> $GITHUB_STEP_SUMMARY + fi + + HIGH_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "High")) | length' result.json) + CRITICAL_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "Critical")) | length' result.json) + TOTAL_HIGH_CRITICAL=$((HIGH_COUNT + CRITICAL_COUNT)) + echo "total_high_critical=$TOTAL_HIGH_CRITICAL" >> $GITHUB_OUTPUT + + if [ $TOTAL_HIGH_CRITICAL -gt 0 ]; then + echo '## High and Critical vulnerabilities found' >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat raw.log | tfs --no-colors show tests | grep -Pi 'High|Critical' >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + fi + + - name: Set commit status + if: always() + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + github.rest.repos.createCommitStatus({ + owner: context.repo.owner, + repo: context.repo.repo, + sha: '${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}', + state: '${{ steps.create_summary.outputs.total_high_critical > 0 && 'failure' || 'success' }}', + target_url: '${{ steps.upload_results.outputs.https_s3_path }}/results.html', + description: 'Grype Scan Completed with ${{ steps.create_summary.outputs.total_high_critical }} high/critical vulnerabilities', + context: 'Grype Scan ${{ steps.set_version.outputs.docker_image || inputs.docker_image }}' + }) + + - name: Upload artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: grype-results-${{ hashFiles('raw.log') }} + path: | + result.json + nice.log.txt diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 2a2eeecc0fad..870945efee78 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -181,7 +181,7 @@ jobs: suffix: '-alpine' - image: keeper suffix: '' - uses: ./.github/workflows/grype_scan.yml@antalya + uses: ./.github/workflows/grype_scan.yml secrets: inherit with: docker_image: altinityinfra/clickhouse-${{ matrix.image }}:${{ github.event.pull_request.number || 0 }}-${{ fromJson(needs.RunConfig.outputs.data).version }}${{ matrix.suffix }} From 9f27dca882f2081aabc5c6d20d0e0ed38cb1d5a5 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Thu, 17 Apr 2025 14:23:37 -0400 Subject: [PATCH 19/24] add missing GITHUB_TOKEN --- .github/workflows/release_branches.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 479823f54628..166421d818d5 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -573,6 +573,7 @@ jobs: CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }} CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} PR_NUMBER: ${{ github.event.pull_request.number || 0 }} ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} From 377556249326ba1b3f0b72602ffbe0118b373e78 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 18 Apr 2025 09:55:48 -0400 Subject: [PATCH 20/24] Add matrix values to job name attr in regression --- .github/workflows/regression.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 1142a1310867..96692fb49174 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -179,7 +179,7 @@ jobs: python3 -u ${{ env.SUITE }}/regression.py --clickhouse-binary-path ${{ env.clickhouse_path }} - --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.SUITE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" ${{ env.args }} || EXITCODE=$?; .github/add_link_to_logs.sh; exit $EXITCODE @@ -243,7 +243,7 @@ jobs: -u alter/regression.py --clickhouse-binary-path ${{ env.clickhouse_path }} --only "/alter/${{ matrix.ONLY }} partition/*" - --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.ONLY }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" ${{ env.args }} || EXITCODE=$?; .github/add_link_to_logs.sh; exit $EXITCODE @@ -314,7 +314,7 @@ jobs: --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }} --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }} --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }} - --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" ${{ env.args }} || EXITCODE=$?; .github/add_link_to_logs.sh; exit $EXITCODE @@ -436,7 +436,7 @@ jobs: python3 -u ${{ env.SUITE }}/regression.py --clickhouse-binary-path ${{ env.clickhouse_path }} - --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.SUITE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" ${{ env.args }} || EXITCODE=$?; .github/add_link_to_logs.sh; exit $EXITCODE @@ -562,7 +562,7 @@ jobs: --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }} --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }} --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }} - --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" ${{ env.args }} || EXITCODE=$?; .github/add_link_to_logs.sh; exit $EXITCODE @@ -633,7 +633,7 @@ jobs: --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }} --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }} --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }} - --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" ${{ env.args }} || EXITCODE=$?; .github/add_link_to_logs.sh; exit $EXITCODE @@ -703,7 +703,7 @@ jobs: --gcs-key-secret ${{ secrets.REGRESSION_GCS_KEY_SECRET }} --gcs-uri ${{ secrets.REGRESSION_GCS_URI }} --with-${{ matrix.STORAGE }} - --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name="$GITHUB_JOB (${{ matrix.STORAGE }})" job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)" ${{ env.args }} || EXITCODE=$?; .github/add_link_to_logs.sh; exit $EXITCODE From cd66e5e046a779d7976c3f204d228b549483c0d6 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 18 Apr 2025 11:23:58 -0400 Subject: [PATCH 21/24] Don't halt on suspicious, but not sensitive, strings. Too many false positives --- tests/ci/s3_helper.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index a473a108acd6..71545eca643b 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -20,14 +20,16 @@ S3_URL, ) -sensitive_var_pattern = re.compile( - r"\b[A-Z_]*(? Date: Sat, 19 Apr 2025 17:18:41 -0400 Subject: [PATCH 22/24] support pagination to fix missing CI jobs status --- .github/create_workflow_report.py | 37 ++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py index 4257925b826d..7db00ddd8f44 100755 --- a/.github/create_workflow_report.py +++ b/.github/create_workflow_report.py @@ -198,6 +198,7 @@ def get_commit_statuses(sha: str) -> pd.DataFrame: """ Fetch commit statuses for a given SHA and return as a pandas DataFrame. + Handles pagination to get all statuses. Args: sha (str): Commit SHA to fetch statuses for. @@ -210,15 +211,35 @@ def get_commit_statuses(sha: str) -> pd.DataFrame: "Accept": "application/vnd.github.v3+json", } - url = f"https://api.github.com/repos/Altinity/ClickHouse/commits/{sha}/statuses" - response = requests.get(url, headers=headers) + url = f"https://api.github.com/repos/{GITHUB_REPO}/commits/{sha}/statuses" - if response.status_code != 200: - raise Exception( - f"Failed to fetch statuses: {response.status_code} {response.text}" - ) + all_data = [] + + while url: + response = requests.get(url, headers=headers) + + if response.status_code != 200: + raise Exception( + f"Failed to fetch statuses: {response.status_code} {response.text}" + ) + + data = response.json() + all_data.extend(data) + + # Check for pagination links in the response headers + if "Link" in response.headers: + links = response.headers["Link"].split(",") + next_url = None + + for link in links: + parts = link.strip().split(";") + if len(parts) == 2 and 'rel="next"' in parts[1]: + next_url = parts[0].strip("<>") + break - data = response.json() + url = next_url + else: + url = None # Parse relevant fields parsed = [ @@ -228,7 +249,7 @@ def get_commit_statuses(sha: str) -> pd.DataFrame: "message": item["description"], "results_link": item["target_url"], } - for item in data + for item in all_data ] return ( From 70d617a0ddd91f43790f508f1c44b6af2ab21542 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Tue, 22 Apr 2025 08:41:03 -0400 Subject: [PATCH 23/24] try to fix branch filter --- .github/workflows/release_branches.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 166421d818d5..6fb776b11c8f 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -25,7 +25,8 @@ on: # yamllint disable-line rule:truthy - prereleased push: branches: - - '**/24.3*' + - 'releases/24.3*' + - 'customizations/24.3*' tags: - '*' workflow_dispatch: From e391aa90e284daf881ed523f8c67f2dfc73463c1 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Thu, 24 Apr 2025 11:16:46 -0400 Subject: [PATCH 24/24] fix usage of GITHUB_REPO in report generation --- .github/create_workflow_report.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py index 7db00ddd8f44..618ee98988fd 100755 --- a/.github/create_workflow_report.py +++ b/.github/create_workflow_report.py @@ -17,6 +17,7 @@ DATABASE_USER_VAR = "CHECKS_DATABASE_USER" DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD" S3_BUCKET = "altinity-build-artifacts" +GITHUB_REPO = "Altinity/ClickHouse" css = """ @@ -274,7 +275,7 @@ def get_pr_info_from_number(pr_number: str) -> dict: "Accept": "application/vnd.github.v3+json", } - url = f"https://api.github.com/repos/Altinity/ClickHouse/pulls/{pr_number}" + url = f"https://api.github.com/repos/{GITHUB_REPO}/pulls/{pr_number}" response = requests.get(url, headers=headers) if response.status_code != 200: @@ -544,7 +545,7 @@ def main(): else: try: pr_info = get_pr_info_from_number(args.pr_number) - pr_info_html = f""" + pr_info_html = f""" #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")} """ except Exception as e: @@ -582,7 +583,7 @@ def main(): Workflow Run{args.actions_run_url.split('/')[-1]} - Commit{args.commit_sha} + Commit{args.commit_sha} Date{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC