Skip to content

Commit 6c7bc5c

Browse files
authored
[CI][Bench] Create summary reports for benchmarking CI run results (#19733)
Much easier to figure out what's wrong with the benchmarking CI runs when it tells you what's wrong immediately: https://github.com/intel/llvm/actions/runs/16789472825
1 parent 0d26010 commit 6c7bc5c

File tree

3 files changed

+82
-5
lines changed

3 files changed

+82
-5
lines changed

devops/actions/run-tests/benchmark/action.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,15 +213,19 @@ runs:
213213
--compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
214214
--results-dir "./llvm-ci-perf-results/results/" \
215215
--regression-filter '^[a-z_]+_sycl ' \
216+
--regression-filter-type 'SYCL' \
216217
--verbose \
218+
--produce-github-summary \
217219
${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \
218220

219221
echo "-----"
220222

221-
- name: Cache changes to benchmark folder for archival purposes
223+
- name: Cache changes and upload github summary
222224
if: always()
223225
shell: bash
224226
run: |
227+
[ -f "github_summary.md" ] && cat github_summary.md >> $GITHUB_STEP_SUMMARY
228+
225229
cd "./llvm-ci-perf-results"
226230
git add .
227231
for diff in $(git diff HEAD --name-only); do

devops/scripts/benchmarks/compare.py

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -340,14 +340,25 @@ def to_hist(
340340
parser_avg.add_argument(
341341
"--regression-filter",
342342
type=str,
343-
help="If provided, only regressions matching provided regex will cause exit status 1.",
343+
help="If provided, only regressions in tests matching provided regex will cause exit status 1.",
344344
default=None,
345345
)
346+
parser_avg.add_argument(
347+
"--regression-filter-type",
348+
type=str,
349+
help="Name to use in logging for tests that fall within the filter defined by --regression-filter; i.e. if --regression-filter filters for SYCL benchmarks, --regression-filter-type could be 'SYCL'.",
350+
default="filtered",
351+
)
346352
parser_avg.add_argument(
347353
"--dry-run",
348354
action="store_true",
349355
help="Do not return error upon regressions.",
350356
)
357+
parser_avg.add_argument(
358+
"--produce-github-summary",
359+
action="store_true",
360+
help=f"Create a summary file '{options.github_summary_filename}' for Github workflow summaries.",
361+
)
351362

352363
args = parser.parse_args()
353364

@@ -365,6 +376,14 @@ def to_hist(
365376
args.avg_type, args.name, args.compare_file, args.results_dir, args.cutoff
366377
)
367378

379+
# Initialize Github summary variables:
380+
if args.produce_github_summary:
381+
gh_summary = []
382+
383+
filter_type_capitalized = (
384+
args.regression_filter_type[0].upper() + args.regression_filter_type[1:]
385+
)
386+
368387
# Not all regressions are of concern: if a filter is provided, filter
369388
# regressions using filter
370389
regressions_ignored = []
@@ -378,7 +397,7 @@ def to_hist(
378397
regressions_ignored.append(test)
379398

380399
def print_regression(entry: dict, is_warning: bool = False):
381-
"""Print an entry outputted from Compare.to_hist
400+
"""Print an entry outputted from Compare.to_hist()
382401
383402
Args:
384403
entry (dict): The entry to print
@@ -390,28 +409,80 @@ def print_regression(entry: dict, is_warning: bool = False):
390409
log_func(f"-- Run result: {entry['value']}")
391410
log_func(f"-- Delta: {entry['delta']}")
392411
log_func("")
412+
if args.produce_github_summary:
413+
gh_summary.append(f"#### {entry['name']}:")
414+
gh_summary.append(
415+
f"- Historic {entry['avg_type']}: {entry['hist_avg']}"
416+
)
417+
gh_summary.append(f"- Run result: {entry['value']}")
418+
gh_summary.append(
419+
# Since we are dealing with floats, our deltas have a lot
420+
# of decimal places. For easier readability, we round our
421+
# deltas and format our Github summary output as:
422+
#
423+
# Delta: <rounded number>% (<full number>)
424+
#
425+
f"- Delta: {round(entry['delta']*100, 2)}% ({entry['delta']})"
426+
)
427+
gh_summary.append("")
393428

394429
if improvements:
395430
log.info("#")
396431
log.info("# Improvements:")
397432
log.info("#")
433+
if args.produce_github_summary:
434+
gh_summary.append(f"### Improvements")
435+
gh_summary.append(
436+
f"<details><summary>{len(improvements)} improved tests:</summary>"
437+
)
438+
gh_summary.append("")
398439
for test in improvements:
399440
print_regression(test)
441+
if args.produce_github_summary:
442+
gh_summary.append("</details>")
443+
gh_summary.append("")
400444
if regressions_ignored:
401445
log.info("#")
402-
log.info("# Regressions (filtered out by regression-filter):")
446+
log.info("# Regressions (filtered out by --regression-filter):")
403447
log.info("#")
448+
if args.produce_github_summary:
449+
gh_summary.append(f"### Non-{filter_type_capitalized} Regressions")
450+
gh_summary.append(
451+
f"<details><summary>{len(regressions_ignored)} non-{args.regression_filter_type} regressions:</summary>"
452+
)
453+
gh_summary.append("")
404454
for test in regressions_ignored:
405455
print_regression(test)
456+
if args.produce_github_summary:
457+
gh_summary.append("</details>")
458+
gh_summary.append("")
406459
if regressions_of_concern:
407460
log.warning("#")
408461
log.warning("# Regressions:")
409462
log.warning("#")
463+
if args.produce_github_summary:
464+
gh_summary.append(f"### {filter_type_capitalized} Regressions")
465+
gh_summary.append(
466+
f"{len(regressions_of_concern)} {args.regression_filter_type} regressions. These regressions warrant a CI failure:"
467+
)
468+
gh_summary.append("")
410469
for test in regressions_of_concern:
411470
print_regression(test, is_warning=True)
471+
if args.produce_github_summary:
472+
gh_summary.append("")
473+
412474
if not args.dry_run:
413-
exit(1) # Exit 1 to trigger github test failure
475+
if args.produce_github_summary:
476+
with open(options.github_summary_filename, "w") as f:
477+
f.write("\n".join(gh_summary))
478+
exit(1) # Exit 1 to trigger Github test failure
479+
414480
log.info("No unexpected regressions found!")
481+
if args.produce_github_summary:
482+
gh_summary.append("No unexpected regressions found!")
483+
with open(options.github_summary_filename, "w") as f:
484+
f.write("\n".join(gh_summary))
485+
415486
else:
416487
log.error("Unsupported operation: exiting.")
417488
exit(1)

devops/scripts/benchmarks/options.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ class Options:
9292
# CI scripts vs SYCl build source.
9393
github_repo_override: str = None
9494
git_commit_override: str = None
95+
# Filename used to store Github summary files:
96+
github_summary_filename: str = "github_summary.md"
9597
# Archiving settings
9698
# Archived runs are stored separately from the main dataset but are still accessible
9799
# via the HTML UI when "Include archived runs" is enabled.

0 commit comments

Comments
 (0)