Skip to content

ci: Add initial regression test workflow #2356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/automation/performance/bench_performance.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#! /bin/bash

# *******************************************************************************
# Copyright 2025 Arm Limited and affiliates.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# *******************************************************************************

# Usage: bash bench_performance.sh {baseline_benchdnn_executable} {benchdnn_executable} {baseline_results_file} {new_results_file}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

for i in {1..5}
do
$1 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $3
$2 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $4
$1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $3
$2 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $4
done
85 changes: 85 additions & 0 deletions .github/automation/performance/benchdnn_comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/python3

# *******************************************************************************
# Copyright 2025 Arm Limited and affiliates.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# *******************************************************************************

import sys
import os
from collections import defaultdict
from scipy.stats import ttest_ind


def compare_two_benchdnn(file1, file2, tolerance=0.05):
"""
Compare two benchdnn output files
"""
with open(file1) as f:
r1 = f.readlines()

with open(file2) as f:
r2 = f.readlines()

# Trim non-formatted lines and split the problem from time
r1 = [x.split(",") for x in r1 if x[0:8] == "--mode=P"]
r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"]

if (len(r1) == 0) or (len(r2) == 0):
raise Exception("One or both of the test results have zero lines")
if len(r1) != len(r2):
raise Exception("The number of benchdnn runs do not match")

r1_samples = defaultdict(list)
r2_samples = defaultdict(list)

for k, v in r1:
r1_samples[k].append(float(v[:-1]))
for k, v in r2:
r2_samples[k].append(float(v[:-1]))

passed = True
failed_tests = []
for prb, r1_times in r1_samples.items():
if prb not in r2_samples:
raise Exception(f"{prb} exists in {file1} but not {file2}")
r2_times = r2_samples[prb]

res = ttest_ind(r2_times, r1_times, alternative='greater')

if res.pvalue < 0.05:
failed_tests.append(prb)
passed = False

print(prb + (" passed" if passed else " failed"))

if "GITHUB_OUTPUT" in os.environ:
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
print(f"pass={passed}", file=f)

if passed:
print("Regression tests passed")
else:
message = "\n----The following regression tests failed:----\n" + \
"\n".join(failed_tests) + "\n"
if "GITHUB_OUTPUT" in os.environ:
out_message = message.replace("\n", "%0A")
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
print(f'message={out_message}', file=f)
print(message)
raise Exception("Some regression tests failed")

if __name__ == "__main__":
compare_two_benchdnn(sys.argv[1], sys.argv[2])
22 changes: 22 additions & 0 deletions .github/automation/performance/inputs/conv
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# *******************************************************************************
# Copyright 2025 Arm Limited and affiliates.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# *******************************************************************************

# From Resnet
--reset
--dir=FWD_D
--dt=f32
mb1_ic64oc256_ih200oh200kh1sh1dh0ph0_iw267ow267kw1sw1dw0pw0
19 changes: 19 additions & 0 deletions .github/automation/performance/inputs/matmul
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# *******************************************************************************
# Copyright 2025 Arm Limited and affiliates.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# *******************************************************************************
--reset
--dt=bf16,f32
1500x384:384x384
60 changes: 60 additions & 0 deletions .github/workflows/ci-aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ jobs:
content="${content//[$'\t\r\n$ ']}"
echo "output=$content" >> $GITHUB_OUTPUT

# Note: This will create a github actions cache
- name: Get latest CMake and Ninja
uses: lukka/get-cmake@5f6e04f5267c8133f1273bf2103583fc72c46b17 # v3.31.5
with:
Expand All @@ -111,6 +112,15 @@ jobs:
with:
version: ${{ fromJson(steps.get-versions.outputs.output).dependencies.clang }}

- name: setup python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install scipy
if: ${{ matrix.config.build == 'Release' }}
run: pip install scipy

- name: Clone ACL
run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh
env:
Expand Down Expand Up @@ -161,6 +171,56 @@ jobs:
CTEST_PARALLEL_LEVEL: 6
DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
ONEDNN_THREADING: ${{ matrix.config.threading }}

## Performance test steps ##
- name: Checkout oneDNN base
if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.base_ref }}
path: oneDNN_base

# TODO :: Create separate pipeline to cache oneDNN base
- name: Configure oneDNN base
if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh
working-directory: ${{ github.workspace }}/oneDNN_base
env:
ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
BUILD_TOOLSET: ${{ matrix.config.toolset }}
CMAKE_BUILD_TYPE: ${{ matrix.config.build }}
CMAKE_GENERATOR: Ninja
GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }}
ONEDNN_ACTION: configure
ONEDNN_TEST_SET: ${{ matrix.config.testset }}
ONEDNN_THREADING: ${{ matrix.config.threading }}

- name: Build oneDNN base
if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh
working-directory: ${{ github.workspace }}/oneDNN_base
env:
ONEDNN_ACTION: build

- name: Run performance tests
shell: bash
if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
run: |
OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt
OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt
env:
DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build

- name: Compare performance test results
if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
id: performance-test
continue-on-error: true
run: python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base.txt new.txt

- name: Check performance test failure
if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && steps.performance-test.outputs.pass != 'True' }}
run: echo "::warning file=.github/workflows/ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}"

# This job adds a check named "CI AArch64" that represents overall
# workflow status and can be used in branch rulesets
status:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/nightly-aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ jobs:
with:
path: oneDNN

# Note: This will create a github actions cache
- name: Get latest CMake and Ninja
uses: lukka/get-cmake@5f6e04f5267c8133f1273bf2103583fc72c46b17 # v3.31.5
with:
Expand Down