From 128434db549aa3e40af58a3feccf4ad68cd0c8db Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Fri, 8 Nov 2024 14:16:06 +0000 Subject: [PATCH 1/4] ci: Add initial regression test workflow --- .github/workflows/ci-aarch64.yml | 78 +++++++++++++++++++++++++ tests/regression/bench_regression.sh | 24 ++++++++ tests/regression/benchdnn_comparison.py | 75 ++++++++++++++++++++++++ tests/regression/consistency_check.sh | 26 +++++++++ tests/regression/inputs/conv | 22 +++++++ tests/regression/inputs/matmul | 19 ++++++ 6 files changed, 244 insertions(+) create mode 100644 tests/regression/bench_regression.sh create mode 100644 tests/regression/benchdnn_comparison.py create mode 100755 tests/regression/consistency_check.sh create mode 100644 tests/regression/inputs/conv create mode 100644 tests/regression/inputs/matmul diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml index 108ef6e9864..4df351353ea 100644 --- a/.github/workflows/ci-aarch64.yml +++ b/.github/workflows/ci-aarch64.yml @@ -111,6 +111,11 @@ jobs: with: version: ${{ fromJson(steps.get-versions.outputs.output).dependencies.clang }} + - name: setup python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Clone ACL run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh env: @@ -161,6 +166,79 @@ jobs: CTEST_PARALLEL_LEVEL: 6 DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build ONEDNN_THREADING: ${{ matrix.config.threading }} + + ## Regression test steps ## + - name: Checkout oneDNN main + if: ${{ matrix.config.build == 'Release' }} + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + ref: main + path: oneDNN_main + + # TODO :: Create separate pipeline to cache oneDNN main + - name: Configure oneDNN main + if: ${{ matrix.config.build == 'Release' }} + run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh + working-directory: ${{ github.workspace }}/oneDNN_main + env: + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + BUILD_TOOLSET: ${{ matrix.config.toolset }} + CMAKE_BUILD_TYPE: ${{ matrix.config.build }} + CMAKE_GENERATOR: Ninja + GCC_VERSION: 13 + ONEDNN_ACTION: configure + ONEDNN_TEST_SET: ${{ matrix.config.testset }} + ONEDNN_THREADING: ${{ matrix.config.threading }} + + - name: Build oneDNN main + if: ${{ matrix.config.build == 'Release' }} + run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh + working-directory: ${{ github.workspace }}/oneDNN_main + env: + ONEDNN_ACTION: build + + - shell: bash + if: ${{ matrix.config.build == 'Release' }} + run: | + bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_1.txt + bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_2.txt + env: + OMP_NUM_THREADS: 4 + DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build + + - name: Compare consistency check results + if: ${{ matrix.config.build == 'Release' }} + id: consistency-check + continue-on-error: true + run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py consistency_1.txt consistency_2.txt + + - shell: bash + if: ${{ matrix.config.build == 'Release' }} + run: | + OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt + OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt + OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt + OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt + env: + DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build + + - name: Compare regression test results + if: ${{ matrix.config.build == 'Release' }} + id: regression-test + continue-on-error: true + run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py main.txt new.txt + + - name: Check consistency-check failure + if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass != 'True' && steps.regression-test.outputs.pass != 'True' }} + run: | + echo "::warnings title=consistency-check-failure::consistency check on main failed, ignoring regression test results!" + + - name: Check regression test failure + if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass == 'True' && steps.regression-test.outputs.pass != 'True' }} + run: | + echo "::error title=regression-test-failure::some regression tests failed. Check the compare regression test results step for more details!" + exit 1 + # This job adds a check named "CI AArch64" that represents overall # workflow status and can be used in branch rulesets status: diff --git a/tests/regression/bench_regression.sh b/tests/regression/bench_regression.sh new file mode 100644 index 00000000000..e83290f87c5 --- /dev/null +++ b/tests/regression/bench_regression.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# ******************************************************************************* +# Copyright 2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +# Usage: bash bench_regression.sh {benchdnn_executable} + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +$1 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul +$1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv \ No newline at end of file diff --git a/tests/regression/benchdnn_comparison.py b/tests/regression/benchdnn_comparison.py new file mode 100644 index 00000000000..ff7b6d75ed9 --- /dev/null +++ b/tests/regression/benchdnn_comparison.py @@ -0,0 +1,75 @@ +#!/usr/bin/python3 + +# ******************************************************************************* +# Copyright 2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +import sys +import os + + +def compare_two_benchdnn(file1, file2, tolerance=0.05): + """ + Compare two benchdnn output files + """ + with open(file1) as f: + r1 = f.readlines() + + with open(file2) as f: + r2 = f.readlines() + + # Trim non-formatted lines and split the prolem from time + r1 = [x.split(",") for x in r1 if x[0:8] == "--mode=P"] + r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"] + + # Convert to dict and trim \n + r1 = [(x[0], float(x[1][:-1])) for x in r1] + r2 = [(x[0], float(x[1][:-1])) for x in r2] + + if len(r1) != len(r2): + raise Exception("The number of benchdnn runs do not match") + + print("%prb%,%-time(old)%,%-time(new)%,%passed%") + + passed = True + failed_tests = [] + for idx, item in enumerate(r1): + prb, time1 = item + if prb != r2[idx][0]: + raise Exception(f"{prb} exists in {file1} but not {file2}") + + res_str = f"{prb}, {time1}, {r2[idx][1]}" + print(res_str) + + if time1 != 0: # Incompatible tests would return 0 so avoid division by 0 + test_pass = (r2[idx][1] - time1) / time1 < tolerance + if not test_pass: + failed_tests.append(res_str) + passed = False + + if "GITHUB_OUTPUT" in os.environ: + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"pass={passed}") + + if passed: + print("Regression tests passed") + else: + print("\n----The following tests did not pass:----") + print("\n".join(failed_tests) + "\n") + raise Exception("Some regression tests did not pass") + +if __name__ == "__main__": + compare_two_benchdnn(sys.argv[1], sys.argv[2]) diff --git a/tests/regression/consistency_check.sh b/tests/regression/consistency_check.sh new file mode 100755 index 00000000000..adf19ab952a --- /dev/null +++ b/tests/regression/consistency_check.sh @@ -0,0 +1,26 @@ +#! /bin/bash + +# ******************************************************************************* +# Copyright 2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +# Used for checking the fluctations in performance of a github actions runner +# before performing the actual regression tests +# +# Usage: bash consistency_check.sh {benchdnn_executable} + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +$1 --matmul --mode=P --repeats-per-prb=20 --perf-template=%prb%,%-time% --dt=f32 128x300:300x128 diff --git a/tests/regression/inputs/conv b/tests/regression/inputs/conv new file mode 100644 index 00000000000..f60f7654074 --- /dev/null +++ b/tests/regression/inputs/conv @@ -0,0 +1,22 @@ +# ******************************************************************************* +# Copyright 2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +# From Resnet +--reset +--dir=FWD_D +--dt=f32 +mb1_ic64oc256_ih200oh200kh1sh1dh0ph0_iw267ow267kw1sw1dw0pw0 \ No newline at end of file diff --git a/tests/regression/inputs/matmul b/tests/regression/inputs/matmul new file mode 100644 index 00000000000..5b3b2bf872c --- /dev/null +++ b/tests/regression/inputs/matmul @@ -0,0 +1,19 @@ +# ******************************************************************************* +# Copyright 2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* +--reset +--dt=bf16,f32 +384x1x384:384x384 \ No newline at end of file From a60a9b727f989825183a2c06039951ff39cfe39f Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Thu, 23 Jan 2025 17:06:08 +0000 Subject: [PATCH 2/4] ci: use t-test for regression testing --- .github/workflows/ci-aarch64.yml | 38 ++++++---------------- tests/regression/bench_regression.sh | 12 +++++-- tests/regression/benchdnn_comparison.py | 42 ++++++++++++++----------- tests/regression/consistency_check.sh | 26 --------------- 4 files changed, 42 insertions(+), 76 deletions(-) mode change 100644 => 100755 tests/regression/bench_regression.sh delete mode 100755 tests/regression/consistency_check.sh diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml index 4df351353ea..49c56eff5eb 100644 --- a/.github/workflows/ci-aarch64.yml +++ b/.github/workflows/ci-aarch64.yml @@ -116,6 +116,10 @@ jobs: with: python-version: '3.10' + - name: Install scipy + if: ${{ matrix.config.build == 'Release' }} + run: pip install scipy + - name: Clone ACL run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh env: @@ -200,25 +204,8 @@ jobs: - shell: bash if: ${{ matrix.config.build == 'Release' }} run: | - bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_1.txt - bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_2.txt - env: - OMP_NUM_THREADS: 4 - DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build - - - name: Compare consistency check results - if: ${{ matrix.config.build == 'Release' }} - id: consistency-check - continue-on-error: true - run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py consistency_1.txt consistency_2.txt - - - shell: bash - if: ${{ matrix.config.build == 'Release' }} - run: | - OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt - OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt - OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt - OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt + OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt + OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt env: DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build @@ -228,17 +215,10 @@ jobs: continue-on-error: true run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py main.txt new.txt - - name: Check consistency-check failure - if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass != 'True' && steps.regression-test.outputs.pass != 'True' }} - run: | - echo "::warnings title=consistency-check-failure::consistency check on main failed, ignoring regression test results!" - - name: Check regression test failure - if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass == 'True' && steps.regression-test.outputs.pass != 'True' }} - run: | - echo "::error title=regression-test-failure::some regression tests failed. Check the compare regression test results step for more details!" - exit 1 - + if: ${{ matrix.config.build == 'Release' && steps.regression-test.outputs.pass == 'True' }} + run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.regression-test.outputs.message }}" + # This job adds a check named "CI AArch64" that represents overall # workflow status and can be used in branch rulesets status: diff --git a/tests/regression/bench_regression.sh b/tests/regression/bench_regression.sh old mode 100644 new mode 100755 index e83290f87c5..b826f9bc270 --- a/tests/regression/bench_regression.sh +++ b/tests/regression/bench_regression.sh @@ -17,8 +17,14 @@ # limitations under the License. # ******************************************************************************* -# Usage: bash bench_regression.sh {benchdnn_executable} +# Usage: bash bench_regression.sh {baseline_benchdnn_executable} {benchdnn_executable} {baseline_results_file} {new_results_file} SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -$1 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul -$1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv \ No newline at end of file + +for i in {1..5} +do + $1 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $3 + $2 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $4 + $1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $3 + $2 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $4 +done \ No newline at end of file diff --git a/tests/regression/benchdnn_comparison.py b/tests/regression/benchdnn_comparison.py index ff7b6d75ed9..8bb918109ab 100644 --- a/tests/regression/benchdnn_comparison.py +++ b/tests/regression/benchdnn_comparison.py @@ -19,6 +19,8 @@ import sys import os +from collections import defaultdict +from scipy.stats import ttest_ind def compare_two_benchdnn(file1, file2, tolerance=0.05): @@ -35,30 +37,30 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05): r1 = [x.split(",") for x in r1 if x[0:8] == "--mode=P"] r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"] - # Convert to dict and trim \n - r1 = [(x[0], float(x[1][:-1])) for x in r1] - r2 = [(x[0], float(x[1][:-1])) for x in r2] - if len(r1) != len(r2): raise Exception("The number of benchdnn runs do not match") - print("%prb%,%-time(old)%,%-time(new)%,%passed%") + # Convert to dict and trim \n + r1_samples = defaultdict(list) + r2_samples = defaultdict(list) + + for k, v in r1: + r1_samples[k].append(float(v[:-1])) + for k, v in r2: + r2_samples[k].append(float(v[:-1])) passed = True failed_tests = [] - for idx, item in enumerate(r1): - prb, time1 = item - if prb != r2[idx][0]: + for prb, r1_times in r1_samples.items(): + if prb not in r2_samples: raise Exception(f"{prb} exists in {file1} but not {file2}") + r2_times = r2_samples[prb] - res_str = f"{prb}, {time1}, {r2[idx][1]}" - print(res_str) + res = ttest_ind(r2_times, r1_times, alternative='greater') - if time1 != 0: # Incompatible tests would return 0 so avoid division by 0 - test_pass = (r2[idx][1] - time1) / time1 < tolerance - if not test_pass: - failed_tests.append(res_str) - passed = False + if res.pvalue < 0.05: + failed_tests.append(prb) + passed = False if "GITHUB_OUTPUT" in os.environ: with open(os.environ["GITHUB_OUTPUT"], "a") as f: @@ -67,9 +69,13 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05): if passed: print("Regression tests passed") else: - print("\n----The following tests did not pass:----") - print("\n".join(failed_tests) + "\n") - raise Exception("Some regression tests did not pass") + message = "\n----The following regression tests failed:----\n" + \ + "\n".join(failed_tests) + "\n" + if "GITHUB_OUTPUT" in os.environ: + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"message={message}") + print(message) + raise Exception("Some regression tests failed") if __name__ == "__main__": compare_two_benchdnn(sys.argv[1], sys.argv[2]) diff --git a/tests/regression/consistency_check.sh b/tests/regression/consistency_check.sh deleted file mode 100755 index adf19ab952a..00000000000 --- a/tests/regression/consistency_check.sh +++ /dev/null @@ -1,26 +0,0 @@ -#! /bin/bash - -# ******************************************************************************* -# Copyright 2025 Arm Limited and affiliates. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ******************************************************************************* - -# Used for checking the fluctations in performance of a github actions runner -# before performing the actual regression tests -# -# Usage: bash consistency_check.sh {benchdnn_executable} - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -$1 --matmul --mode=P --repeats-per-prb=20 --perf-template=%prb%,%-time% --dt=f32 128x300:300x128 From 3a74b4e56dcc23f912436ef190c8679a6b456316 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Mon, 27 Jan 2025 16:44:19 +0000 Subject: [PATCH 3/4] ci: refactor regression tests --- .../performance/bench_performance.sh | 2 +- .../performance}/benchdnn_comparison.py | 0 .../automation/performance}/inputs/conv | 0 .../automation/performance}/inputs/matmul | 0 .github/workflows/ci-aarch64.yml | 49 +++++++------------ .github/workflows/nightly-aarch64.yml | 1 + 6 files changed, 20 insertions(+), 32 deletions(-) rename tests/regression/bench_regression.sh => .github/automation/performance/bench_performance.sh (90%) rename {tests/regression => .github/automation/performance}/benchdnn_comparison.py (100%) rename {tests/regression => .github/automation/performance}/inputs/conv (100%) rename {tests/regression => .github/automation/performance}/inputs/matmul (100%) diff --git a/tests/regression/bench_regression.sh b/.github/automation/performance/bench_performance.sh similarity index 90% rename from tests/regression/bench_regression.sh rename to .github/automation/performance/bench_performance.sh index b826f9bc270..f0299b1b76b 100755 --- a/tests/regression/bench_regression.sh +++ b/.github/automation/performance/bench_performance.sh @@ -17,7 +17,7 @@ # limitations under the License. # ******************************************************************************* -# Usage: bash bench_regression.sh {baseline_benchdnn_executable} {benchdnn_executable} {baseline_results_file} {new_results_file} +# Usage: bash bench_performance.sh {baseline_benchdnn_executable} {benchdnn_executable} {baseline_results_file} {new_results_file} SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) diff --git a/tests/regression/benchdnn_comparison.py b/.github/automation/performance/benchdnn_comparison.py similarity index 100% rename from tests/regression/benchdnn_comparison.py rename to .github/automation/performance/benchdnn_comparison.py diff --git a/tests/regression/inputs/conv b/.github/automation/performance/inputs/conv similarity index 100% rename from tests/regression/inputs/conv rename to .github/automation/performance/inputs/conv diff --git a/tests/regression/inputs/matmul b/.github/automation/performance/inputs/matmul similarity index 100% rename from tests/regression/inputs/matmul rename to .github/automation/performance/inputs/matmul diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml index 49c56eff5eb..1328c7dd970 100644 --- a/.github/workflows/ci-aarch64.yml +++ b/.github/workflows/ci-aarch64.yml @@ -20,18 +20,6 @@ name: "CI AArch64" #* To avoid duplicate jobs running when both push and PR is satisfied, we use this: #* https://github.com/orgs/community/discussions/26940#discussioncomment-5686753 on: - push: - branches: [main, "rls-*"] - paths: - - ".github/**" - - "cmake/**" - - "examples/**" - - "include/**" - - "src/common/**" - - "src/cpu/*" - - "src/cpu/aarch64/**" - - "tests/**" - - "CMakeLists.txt" pull_request: types: [opened, synchronize, reopened] paths: @@ -48,10 +36,8 @@ on: workflow_dispatch: #* Stop stale workflows when pull requests are updated: https://stackoverflow.com/a/70972844 -#* Does not apply to the main branch. concurrency: group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} # Declare default permissions as read only. permissions: read-all @@ -87,6 +73,7 @@ jobs: content="${content//[$'\t\r\n$ ']}" echo "output=$content" >> $GITHUB_OUTPUT + # Note: This will create a github actions cache - name: Get latest CMake and Ninja uses: lukka/get-cmake@5f6e04f5267c8133f1273bf2103583fc72c46b17 # v3.31.5 with: @@ -171,19 +158,19 @@ jobs: DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build ONEDNN_THREADING: ${{ matrix.config.threading }} - ## Regression test steps ## - - name: Checkout oneDNN main + ## Performance test steps ## + - name: Checkout oneDNN base if: ${{ matrix.config.build == 'Release' }} uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - ref: main - path: oneDNN_main + ref: ${{ github.base_ref }} + path: oneDNN_base - # TODO :: Create separate pipeline to cache oneDNN main - - name: Configure oneDNN main + # TODO :: Create separate pipeline to cache oneDNN base + - name: Configure oneDNN base if: ${{ matrix.config.build == 'Release' }} run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh - working-directory: ${{ github.workspace }}/oneDNN_main + working-directory: ${{ github.workspace }}/oneDNN_base env: ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary BUILD_TOOLSET: ${{ matrix.config.toolset }} @@ -194,30 +181,30 @@ jobs: ONEDNN_TEST_SET: ${{ matrix.config.testset }} ONEDNN_THREADING: ${{ matrix.config.threading }} - - name: Build oneDNN main + - name: Build oneDNN base if: ${{ matrix.config.build == 'Release' }} run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh - working-directory: ${{ github.workspace }}/oneDNN_main + working-directory: ${{ github.workspace }}/oneDNN_base env: ONEDNN_ACTION: build - shell: bash if: ${{ matrix.config.build == 'Release' }} run: | - OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt - OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt + OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt + OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt env: DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build - - name: Compare regression test results + - name: Compare performance test results if: ${{ matrix.config.build == 'Release' }} - id: regression-test + id: performance-test continue-on-error: true - run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py main.txt new.txt + run: python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base.txt new.txt - - name: Check regression test failure - if: ${{ matrix.config.build == 'Release' && steps.regression-test.outputs.pass == 'True' }} - run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.regression-test.outputs.message }}" + - name: Check performance test failure + if: ${{ matrix.config.build == 'Release' && steps.performance-test.outputs.pass != 'True' }} + run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}" # This job adds a check named "CI AArch64" that represents overall # workflow status and can be used in branch rulesets diff --git a/.github/workflows/nightly-aarch64.yml b/.github/workflows/nightly-aarch64.yml index 14e03718500..f026ab36e34 100644 --- a/.github/workflows/nightly-aarch64.yml +++ b/.github/workflows/nightly-aarch64.yml @@ -56,6 +56,7 @@ jobs: with: path: oneDNN + # Note: This will create a github actions cache - name: Get latest CMake and Ninja uses: lukka/get-cmake@5f6e04f5267c8133f1273bf2103583fc72c46b17 # v3.31.5 with: From 79ef6f01921f48d377319eb3e74844d713f237a7 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Wed, 5 Feb 2025 11:17:27 +0000 Subject: [PATCH 4/4] ci: initial regression test --- .../performance/bench_performance.sh | 2 +- .../performance/benchdnn_comparison.py | 12 ++++-- .github/automation/performance/inputs/conv | 2 +- .github/automation/performance/inputs/matmul | 2 +- .github/workflows/ci-aarch64.yml | 37 +++++++++++++------ 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/.github/automation/performance/bench_performance.sh b/.github/automation/performance/bench_performance.sh index f0299b1b76b..ff88cd455f0 100755 --- a/.github/automation/performance/bench_performance.sh +++ b/.github/automation/performance/bench_performance.sh @@ -27,4 +27,4 @@ do $2 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $4 $1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $3 $2 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $4 -done \ No newline at end of file +done diff --git a/.github/automation/performance/benchdnn_comparison.py b/.github/automation/performance/benchdnn_comparison.py index 8bb918109ab..71326076dac 100644 --- a/.github/automation/performance/benchdnn_comparison.py +++ b/.github/automation/performance/benchdnn_comparison.py @@ -33,14 +33,15 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05): with open(file2) as f: r2 = f.readlines() - # Trim non-formatted lines and split the prolem from time + # Trim non-formatted lines and split the problem from time r1 = [x.split(",") for x in r1 if x[0:8] == "--mode=P"] r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"] + if (len(r1) == 0) or (len(r2) == 0): + raise Exception("One or both of the test results have zero lines") if len(r1) != len(r2): raise Exception("The number of benchdnn runs do not match") - # Convert to dict and trim \n r1_samples = defaultdict(list) r2_samples = defaultdict(list) @@ -62,9 +63,11 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05): failed_tests.append(prb) passed = False + print(prb + (" passed" if passed else " failed")) + if "GITHUB_OUTPUT" in os.environ: with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write(f"pass={passed}") + print(f"pass={passed}", file=f) if passed: print("Regression tests passed") @@ -72,8 +75,9 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05): message = "\n----The following regression tests failed:----\n" + \ "\n".join(failed_tests) + "\n" if "GITHUB_OUTPUT" in os.environ: + out_message = message.replace("\n", "%0A") with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write(f"message={message}") + print(f'message={out_message}', file=f) print(message) raise Exception("Some regression tests failed") diff --git a/.github/automation/performance/inputs/conv b/.github/automation/performance/inputs/conv index f60f7654074..83c08d04446 100644 --- a/.github/automation/performance/inputs/conv +++ b/.github/automation/performance/inputs/conv @@ -19,4 +19,4 @@ --reset --dir=FWD_D --dt=f32 -mb1_ic64oc256_ih200oh200kh1sh1dh0ph0_iw267ow267kw1sw1dw0pw0 \ No newline at end of file +mb1_ic64oc256_ih200oh200kh1sh1dh0ph0_iw267ow267kw1sw1dw0pw0 diff --git a/.github/automation/performance/inputs/matmul b/.github/automation/performance/inputs/matmul index 5b3b2bf872c..ec3f485a925 100644 --- a/.github/automation/performance/inputs/matmul +++ b/.github/automation/performance/inputs/matmul @@ -16,4 +16,4 @@ # ******************************************************************************* --reset --dt=bf16,f32 -384x1x384:384x384 \ No newline at end of file +1500x384:384x384 diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml index 1328c7dd970..be474eaa7ce 100644 --- a/.github/workflows/ci-aarch64.yml +++ b/.github/workflows/ci-aarch64.yml @@ -20,6 +20,18 @@ name: "CI AArch64" #* To avoid duplicate jobs running when both push and PR is satisfied, we use this: #* https://github.com/orgs/community/discussions/26940#discussioncomment-5686753 on: + push: + branches: [main, "rls-*"] + paths: + - ".github/**" + - "cmake/**" + - "examples/**" + - "include/**" + - "src/common/**" + - "src/cpu/*" + - "src/cpu/aarch64/**" + - "tests/**" + - "CMakeLists.txt" pull_request: types: [opened, synchronize, reopened] paths: @@ -36,8 +48,10 @@ on: workflow_dispatch: #* Stop stale workflows when pull requests are updated: https://stackoverflow.com/a/70972844 +#* Does not apply to the main branch. concurrency: group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} # Declare default permissions as read only. permissions: read-all @@ -160,7 +174,7 @@ jobs: ## Performance test steps ## - name: Checkout oneDNN base - if: ${{ matrix.config.build == 'Release' }} + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }} uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.base_ref }} @@ -168,28 +182,29 @@ jobs: # TODO :: Create separate pipeline to cache oneDNN base - name: Configure oneDNN base - if: ${{ matrix.config.build == 'Release' }} - run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }} + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh working-directory: ${{ github.workspace }}/oneDNN_base env: ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary BUILD_TOOLSET: ${{ matrix.config.toolset }} CMAKE_BUILD_TYPE: ${{ matrix.config.build }} CMAKE_GENERATOR: Ninja - GCC_VERSION: 13 + GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} ONEDNN_ACTION: configure ONEDNN_TEST_SET: ${{ matrix.config.testset }} ONEDNN_THREADING: ${{ matrix.config.threading }} - name: Build oneDNN base - if: ${{ matrix.config.build == 'Release' }} - run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }} + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh working-directory: ${{ github.workspace }}/oneDNN_base env: ONEDNN_ACTION: build - - shell: bash - if: ${{ matrix.config.build == 'Release' }} + - name: Run performance tests + shell: bash + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }} run: | OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt @@ -197,14 +212,14 @@ jobs: DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build - name: Compare performance test results - if: ${{ matrix.config.build == 'Release' }} + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }} id: performance-test continue-on-error: true run: python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base.txt new.txt - name: Check performance test failure - if: ${{ matrix.config.build == 'Release' && steps.performance-test.outputs.pass != 'True' }} - run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}" + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && steps.performance-test.outputs.pass != 'True' }} + run: echo "::warning file=.github/workflows/ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}" # This job adds a check named "CI AArch64" that represents overall # workflow status and can be used in branch rulesets