From 128434db549aa3e40af58a3feccf4ad68cd0c8db Mon Sep 17 00:00:00 2001
From: Ryo Suzuki <ryo.suzuki@arm.com>
Date: Fri, 8 Nov 2024 14:16:06 +0000
Subject: [PATCH 1/4] ci: Add initial regression test workflow

---
 .github/workflows/ci-aarch64.yml        | 78 +++++++++++++++++++++++++
 tests/regression/bench_regression.sh    | 24 ++++++++
 tests/regression/benchdnn_comparison.py | 75 ++++++++++++++++++++++++
 tests/regression/consistency_check.sh   | 26 +++++++++
 tests/regression/inputs/conv            | 22 +++++++
 tests/regression/inputs/matmul          | 19 ++++++
 6 files changed, 244 insertions(+)
 create mode 100644 tests/regression/bench_regression.sh
 create mode 100644 tests/regression/benchdnn_comparison.py
 create mode 100755 tests/regression/consistency_check.sh
 create mode 100644 tests/regression/inputs/conv
 create mode 100644 tests/regression/inputs/matmul

diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml
index 108ef6e9864..4df351353ea 100644
--- a/.github/workflows/ci-aarch64.yml
+++ b/.github/workflows/ci-aarch64.yml
@@ -111,6 +111,11 @@ jobs:
         with:
           version: ${{ fromJson(steps.get-versions.outputs.output).dependencies.clang }}
 
+      - name: setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
       - name: Clone ACL
         run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh
         env:
@@ -161,6 +166,79 @@ jobs:
           CTEST_PARALLEL_LEVEL: 6
           DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
           ONEDNN_THREADING: ${{ matrix.config.threading }}
+
+      ## Regression test steps ##
+      - name: Checkout oneDNN main
+        if: ${{ matrix.config.build == 'Release' }}
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          ref: main
+          path: oneDNN_main
+
+      # TODO :: Create separate pipeline to cache oneDNN main
+      - name: Configure oneDNN main
+        if: ${{ matrix.config.build == 'Release' }}
+        run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
+        working-directory: ${{ github.workspace }}/oneDNN_main
+        env:
+          ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
+          BUILD_TOOLSET: ${{ matrix.config.toolset }}
+          CMAKE_BUILD_TYPE: ${{ matrix.config.build }}
+          CMAKE_GENERATOR: Ninja
+          GCC_VERSION: 13
+          ONEDNN_ACTION: configure
+          ONEDNN_TEST_SET: ${{ matrix.config.testset }}
+          ONEDNN_THREADING: ${{ matrix.config.threading }}
+
+      - name: Build oneDNN main
+        if: ${{ matrix.config.build == 'Release' }}
+        run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
+        working-directory: ${{ github.workspace }}/oneDNN_main
+        env:
+          ONEDNN_ACTION: build
+
+      - shell: bash
+        if: ${{ matrix.config.build == 'Release' }}
+        run: |
+          bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_1.txt
+          bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_2.txt
+        env:
+          OMP_NUM_THREADS: 4
+          DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
+
+      - name: Compare consistency check results
+        if: ${{ matrix.config.build == 'Release' }}
+        id: consistency-check
+        continue-on-error: true
+        run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py consistency_1.txt consistency_2.txt
+
+      - shell: bash
+        if: ${{ matrix.config.build == 'Release' }}
+        run: |
+          OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt
+          OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt
+          OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt
+          OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt
+        env:
+          DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
+
+      - name: Compare regression test results
+        if: ${{ matrix.config.build == 'Release' }}
+        id: regression-test
+        continue-on-error: true
+        run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py main.txt new.txt
+
+      - name: Check consistency-check failure
+        if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass != 'True' && steps.regression-test.outputs.pass != 'True' }}
+        run: |
+          echo "::warnings title=consistency-check-failure::consistency check on main failed, ignoring regression test results!"
+      
+      - name: Check regression test failure
+        if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass == 'True' && steps.regression-test.outputs.pass != 'True' }}
+        run: |
+          echo "::error title=regression-test-failure::some regression tests failed. Check the compare regression test results step for more details!"
+          exit 1
+  
   # This job adds a check named "CI AArch64" that represents overall
   # workflow status and can be used in branch rulesets
   status:
diff --git a/tests/regression/bench_regression.sh b/tests/regression/bench_regression.sh
new file mode 100644
index 00000000000..e83290f87c5
--- /dev/null
+++ b/tests/regression/bench_regression.sh
@@ -0,0 +1,24 @@
+#! /bin/bash
+
+# *******************************************************************************
+# Copyright 2025 Arm Limited and affiliates.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# *******************************************************************************
+
+# Usage: bash bench_regression.sh {benchdnn_executable}
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+$1 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul
+$1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv
\ No newline at end of file
diff --git a/tests/regression/benchdnn_comparison.py b/tests/regression/benchdnn_comparison.py
new file mode 100644
index 00000000000..ff7b6d75ed9
--- /dev/null
+++ b/tests/regression/benchdnn_comparison.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python3
+
+# *******************************************************************************
+# Copyright 2025 Arm Limited and affiliates.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# *******************************************************************************
+
+import sys
+import os
+
+
+def compare_two_benchdnn(file1, file2, tolerance=0.05):
+    """
+    Compare two benchdnn output files
+    """
+    with open(file1) as f:
+        r1 = f.readlines()
+
+    with open(file2) as f:
+        r2 = f.readlines()
+
+    # Trim non-formatted lines and split the prolem from time
+    r1 = [x.split(",") for x in r1 if x[0:8] == "--mode=P"]
+    r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"]
+
+    # Convert to dict and trim \n
+    r1 = [(x[0], float(x[1][:-1])) for x in r1]
+    r2 = [(x[0], float(x[1][:-1])) for x in r2]
+
+    if len(r1) != len(r2):
+        raise Exception("The number of benchdnn runs do not match")
+
+    print("%prb%,%-time(old)%,%-time(new)%,%passed%")
+
+    passed = True
+    failed_tests = []
+    for idx, item in enumerate(r1):
+        prb, time1 = item
+        if prb != r2[idx][0]:
+            raise Exception(f"{prb} exists in {file1} but not {file2}")
+
+        res_str = f"{prb}, {time1}, {r2[idx][1]}"
+        print(res_str)
+
+        if time1 != 0: # Incompatible tests would return 0 so avoid division by 0
+            test_pass = (r2[idx][1] - time1) / time1 < tolerance
+            if not test_pass:
+                failed_tests.append(res_str)
+                passed = False
+
+    if "GITHUB_OUTPUT" in os.environ:
+        with open(os.environ["GITHUB_OUTPUT"], "a") as f:
+            f.write(f"pass={passed}")
+
+    if passed:
+        print("Regression tests passed")
+    else:
+        print("\n----The following tests did not pass:----")
+        print("\n".join(failed_tests) + "\n")
+        raise Exception("Some regression tests did not pass")
+
+if __name__ == "__main__":
+    compare_two_benchdnn(sys.argv[1], sys.argv[2])
diff --git a/tests/regression/consistency_check.sh b/tests/regression/consistency_check.sh
new file mode 100755
index 00000000000..adf19ab952a
--- /dev/null
+++ b/tests/regression/consistency_check.sh
@@ -0,0 +1,26 @@
+#! /bin/bash
+
+# *******************************************************************************
+# Copyright 2025 Arm Limited and affiliates.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# *******************************************************************************
+
+# Used for checking the fluctations in performance of a github actions runner
+# before performing the actual regression tests
+#
+# Usage: bash consistency_check.sh {benchdnn_executable}
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+$1 --matmul --mode=P --repeats-per-prb=20 --perf-template=%prb%,%-time% --dt=f32 128x300:300x128
diff --git a/tests/regression/inputs/conv b/tests/regression/inputs/conv
new file mode 100644
index 00000000000..f60f7654074
--- /dev/null
+++ b/tests/regression/inputs/conv
@@ -0,0 +1,22 @@
+# *******************************************************************************
+# Copyright 2025 Arm Limited and affiliates.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# *******************************************************************************
+
+# From Resnet
+--reset
+--dir=FWD_D
+--dt=f32
+mb1_ic64oc256_ih200oh200kh1sh1dh0ph0_iw267ow267kw1sw1dw0pw0
\ No newline at end of file
diff --git a/tests/regression/inputs/matmul b/tests/regression/inputs/matmul
new file mode 100644
index 00000000000..5b3b2bf872c
--- /dev/null
+++ b/tests/regression/inputs/matmul
@@ -0,0 +1,19 @@
+# *******************************************************************************
+# Copyright 2025 Arm Limited and affiliates.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# *******************************************************************************
+--reset
+--dt=bf16,f32
+384x1x384:384x384
\ No newline at end of file

From a60a9b727f989825183a2c06039951ff39cfe39f Mon Sep 17 00:00:00 2001
From: Ryo Suzuki <ryo.suzuki@arm.com>
Date: Thu, 23 Jan 2025 17:06:08 +0000
Subject: [PATCH 2/4] ci: use t-test for regression testing

---
 .github/workflows/ci-aarch64.yml        | 38 ++++++----------------
 tests/regression/bench_regression.sh    | 12 +++++--
 tests/regression/benchdnn_comparison.py | 42 ++++++++++++++-----------
 tests/regression/consistency_check.sh   | 26 ---------------
 4 files changed, 42 insertions(+), 76 deletions(-)
 mode change 100644 => 100755 tests/regression/bench_regression.sh
 delete mode 100755 tests/regression/consistency_check.sh

diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml
index 4df351353ea..49c56eff5eb 100644
--- a/.github/workflows/ci-aarch64.yml
+++ b/.github/workflows/ci-aarch64.yml
@@ -116,6 +116,10 @@ jobs:
         with:
           python-version: '3.10'
 
+      - name: Install scipy
+        if: ${{ matrix.config.build == 'Release' }}
+        run: pip install scipy
+
       - name: Clone ACL
         run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh
         env:
@@ -200,25 +204,8 @@ jobs:
       - shell: bash
         if: ${{ matrix.config.build == 'Release' }}
         run: |
-          bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_1.txt
-          bash ${{ github.workspace }}/oneDNN/tests/regression/consistency_check.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn > consistency_2.txt
-        env:
-          OMP_NUM_THREADS: 4
-          DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
-
-      - name: Compare consistency check results
-        if: ${{ matrix.config.build == 'Release' }}
-        id: consistency-check
-        continue-on-error: true
-        run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py consistency_1.txt consistency_2.txt
-
-      - shell: bash
-        if: ${{ matrix.config.build == 'Release' }}
-        run: |
-          OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt
-          OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt
-          OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn >> main.txt
-          OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn >> new.txt
+          OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt
+          OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt
         env:
           DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
 
@@ -228,17 +215,10 @@ jobs:
         continue-on-error: true
         run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py main.txt new.txt
 
-      - name: Check consistency-check failure
-        if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass != 'True' && steps.regression-test.outputs.pass != 'True' }}
-        run: |
-          echo "::warnings title=consistency-check-failure::consistency check on main failed, ignoring regression test results!"
-      
       - name: Check regression test failure
-        if: ${{ matrix.config.build == 'Release' && steps.consistency-check.outputs.pass == 'True' && steps.regression-test.outputs.pass != 'True' }}
-        run: |
-          echo "::error title=regression-test-failure::some regression tests failed. Check the compare regression test results step for more details!"
-          exit 1
-  
+        if: ${{ matrix.config.build == 'Release' && steps.regression-test.outputs.pass == 'True' }}
+        run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.regression-test.outputs.message }}"
+
   # This job adds a check named "CI AArch64" that represents overall
   # workflow status and can be used in branch rulesets
   status:
diff --git a/tests/regression/bench_regression.sh b/tests/regression/bench_regression.sh
old mode 100644
new mode 100755
index e83290f87c5..b826f9bc270
--- a/tests/regression/bench_regression.sh
+++ b/tests/regression/bench_regression.sh
@@ -17,8 +17,14 @@
 # limitations under the License.
 # *******************************************************************************
 
-# Usage: bash bench_regression.sh {benchdnn_executable}
+# Usage: bash bench_regression.sh {baseline_benchdnn_executable} {benchdnn_executable} {baseline_results_file} {new_results_file}
 
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-$1 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul
-$1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv
\ No newline at end of file
+
+for i in {1..5}
+do
+    $1 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $3
+    $2 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $4
+    $1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $3
+    $2 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $4
+done
\ No newline at end of file
diff --git a/tests/regression/benchdnn_comparison.py b/tests/regression/benchdnn_comparison.py
index ff7b6d75ed9..8bb918109ab 100644
--- a/tests/regression/benchdnn_comparison.py
+++ b/tests/regression/benchdnn_comparison.py
@@ -19,6 +19,8 @@
 
 import sys
 import os
+from collections import defaultdict
+from scipy.stats import ttest_ind
 
 
 def compare_two_benchdnn(file1, file2, tolerance=0.05):
@@ -35,30 +37,30 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
     r1 = [x.split(",") for x in r1 if x[0:8] == "--mode=P"]
     r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"]
 
-    # Convert to dict and trim \n
-    r1 = [(x[0], float(x[1][:-1])) for x in r1]
-    r2 = [(x[0], float(x[1][:-1])) for x in r2]
-
     if len(r1) != len(r2):
         raise Exception("The number of benchdnn runs do not match")
 
-    print("%prb%,%-time(old)%,%-time(new)%,%passed%")
+    # Convert to dict and trim \n
+    r1_samples = defaultdict(list)
+    r2_samples = defaultdict(list)
+
+    for k, v in r1:
+        r1_samples[k].append(float(v[:-1]))
+    for k, v in r2:
+        r2_samples[k].append(float(v[:-1]))
 
     passed = True
     failed_tests = []
-    for idx, item in enumerate(r1):
-        prb, time1 = item
-        if prb != r2[idx][0]:
+    for prb, r1_times in r1_samples.items():
+        if prb not in r2_samples:
             raise Exception(f"{prb} exists in {file1} but not {file2}")
+        r2_times = r2_samples[prb]
 
-        res_str = f"{prb}, {time1}, {r2[idx][1]}"
-        print(res_str)
+        res = ttest_ind(r2_times, r1_times, alternative='greater')
 
-        if time1 != 0: # Incompatible tests would return 0 so avoid division by 0
-            test_pass = (r2[idx][1] - time1) / time1 < tolerance
-            if not test_pass:
-                failed_tests.append(res_str)
-                passed = False
+        if res.pvalue < 0.05:
+            failed_tests.append(prb)
+            passed = False
 
     if "GITHUB_OUTPUT" in os.environ:
         with open(os.environ["GITHUB_OUTPUT"], "a") as f:
@@ -67,9 +69,13 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
     if passed:
         print("Regression tests passed")
     else:
-        print("\n----The following tests did not pass:----")
-        print("\n".join(failed_tests) + "\n")
-        raise Exception("Some regression tests did not pass")
+        message = "\n----The following regression tests failed:----\n" + \
+                    "\n".join(failed_tests) + "\n"
+        if "GITHUB_OUTPUT" in os.environ:
+            with open(os.environ["GITHUB_OUTPUT"], "a") as f:
+                f.write(f"message={message}")
+        print(message)
+        raise Exception("Some regression tests failed")
 
 if __name__ == "__main__":
     compare_two_benchdnn(sys.argv[1], sys.argv[2])
diff --git a/tests/regression/consistency_check.sh b/tests/regression/consistency_check.sh
deleted file mode 100755
index adf19ab952a..00000000000
--- a/tests/regression/consistency_check.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#! /bin/bash
-
-# *******************************************************************************
-# Copyright 2025 Arm Limited and affiliates.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# *******************************************************************************
-
-# Used for checking the fluctations in performance of a github actions runner
-# before performing the actual regression tests
-#
-# Usage: bash consistency_check.sh {benchdnn_executable}
-
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-$1 --matmul --mode=P --repeats-per-prb=20 --perf-template=%prb%,%-time% --dt=f32 128x300:300x128

From 3a74b4e56dcc23f912436ef190c8679a6b456316 Mon Sep 17 00:00:00 2001
From: Ryo Suzuki <ryo.suzuki@arm.com>
Date: Mon, 27 Jan 2025 16:44:19 +0000
Subject: [PATCH 3/4] ci: refactor regression tests

---
 .../performance/bench_performance.sh          |  2 +-
 .../performance}/benchdnn_comparison.py       |  0
 .../automation/performance}/inputs/conv       |  0
 .../automation/performance}/inputs/matmul     |  0
 .github/workflows/ci-aarch64.yml              | 49 +++++++------------
 .github/workflows/nightly-aarch64.yml         |  1 +
 6 files changed, 20 insertions(+), 32 deletions(-)
 rename tests/regression/bench_regression.sh => .github/automation/performance/bench_performance.sh (90%)
 rename {tests/regression => .github/automation/performance}/benchdnn_comparison.py (100%)
 rename {tests/regression => .github/automation/performance}/inputs/conv (100%)
 rename {tests/regression => .github/automation/performance}/inputs/matmul (100%)

diff --git a/tests/regression/bench_regression.sh b/.github/automation/performance/bench_performance.sh
similarity index 90%
rename from tests/regression/bench_regression.sh
rename to .github/automation/performance/bench_performance.sh
index b826f9bc270..f0299b1b76b 100755
--- a/tests/regression/bench_regression.sh
+++ b/.github/automation/performance/bench_performance.sh
@@ -17,7 +17,7 @@
 # limitations under the License.
 # *******************************************************************************
 
-# Usage: bash bench_regression.sh {baseline_benchdnn_executable} {benchdnn_executable} {baseline_results_file} {new_results_file}
+# Usage: bash bench_performance.sh {baseline_benchdnn_executable} {benchdnn_executable} {baseline_results_file} {new_results_file}
 
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 
diff --git a/tests/regression/benchdnn_comparison.py b/.github/automation/performance/benchdnn_comparison.py
similarity index 100%
rename from tests/regression/benchdnn_comparison.py
rename to .github/automation/performance/benchdnn_comparison.py
diff --git a/tests/regression/inputs/conv b/.github/automation/performance/inputs/conv
similarity index 100%
rename from tests/regression/inputs/conv
rename to .github/automation/performance/inputs/conv
diff --git a/tests/regression/inputs/matmul b/.github/automation/performance/inputs/matmul
similarity index 100%
rename from tests/regression/inputs/matmul
rename to .github/automation/performance/inputs/matmul
diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml
index 49c56eff5eb..1328c7dd970 100644
--- a/.github/workflows/ci-aarch64.yml
+++ b/.github/workflows/ci-aarch64.yml
@@ -20,18 +20,6 @@ name: "CI AArch64"
 #* To avoid duplicate jobs running when both push and PR is satisfied, we use this:
 #* https://github.com/orgs/community/discussions/26940#discussioncomment-5686753
 on:
-  push:
-    branches: [main, "rls-*"]
-    paths:
-      - ".github/**"
-      - "cmake/**"
-      - "examples/**"
-      - "include/**"
-      - "src/common/**"
-      - "src/cpu/*"
-      - "src/cpu/aarch64/**"
-      - "tests/**"
-      - "CMakeLists.txt"
   pull_request:
     types: [opened, synchronize, reopened]
     paths:
@@ -48,10 +36,8 @@ on:
   workflow_dispatch:
 
 #* Stop stale workflows when pull requests are updated: https://stackoverflow.com/a/70972844
-#* Does not apply to the main branch.
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
 # Declare default permissions as read only.
 permissions: read-all
@@ -87,6 +73,7 @@ jobs:
           content="${content//[$'\t\r\n$ ']}"
           echo "output=$content" >> $GITHUB_OUTPUT
 
+      # Note: This will create a github actions cache
       - name: Get latest CMake and Ninja
         uses: lukka/get-cmake@5f6e04f5267c8133f1273bf2103583fc72c46b17 # v3.31.5
         with:
@@ -171,19 +158,19 @@ jobs:
           DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
           ONEDNN_THREADING: ${{ matrix.config.threading }}
 
-      ## Regression test steps ##
-      - name: Checkout oneDNN main
+      ## Performance test steps ##
+      - name: Checkout oneDNN base
         if: ${{ matrix.config.build == 'Release' }}
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
-          ref: main
-          path: oneDNN_main
+          ref: ${{ github.base_ref }}
+          path: oneDNN_base
 
-      # TODO :: Create separate pipeline to cache oneDNN main
-      - name: Configure oneDNN main
+      # TODO :: Create separate pipeline to cache oneDNN base
+      - name: Configure oneDNN base
         if: ${{ matrix.config.build == 'Release' }}
         run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
-        working-directory: ${{ github.workspace }}/oneDNN_main
+        working-directory: ${{ github.workspace }}/oneDNN_base
         env:
           ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
           BUILD_TOOLSET: ${{ matrix.config.toolset }}
@@ -194,30 +181,30 @@ jobs:
           ONEDNN_TEST_SET: ${{ matrix.config.testset }}
           ONEDNN_THREADING: ${{ matrix.config.threading }}
 
-      - name: Build oneDNN main
+      - name: Build oneDNN base
         if: ${{ matrix.config.build == 'Release' }}
         run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
-        working-directory: ${{ github.workspace }}/oneDNN_main
+        working-directory: ${{ github.workspace }}/oneDNN_base
         env:
           ONEDNN_ACTION: build
 
       - shell: bash
         if: ${{ matrix.config.build == 'Release' }}
         run: |
-          OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt
-          OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/tests/regression/bench_regression.sh ${{ github.workspace }}/oneDNN_main/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn main.txt new.txt
+          OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt
+          OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt
         env:
           DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
 
-      - name: Compare regression test results
+      - name: Compare performance test results
         if: ${{ matrix.config.build == 'Release' }}
-        id: regression-test
+        id: performance-test
         continue-on-error: true
-        run: python ${{ github.workspace }}/oneDNN/tests/regression/benchdnn_comparison.py main.txt new.txt
+        run: python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base.txt new.txt
 
-      - name: Check regression test failure
-        if: ${{ matrix.config.build == 'Release' && steps.regression-test.outputs.pass == 'True' }}
-        run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.regression-test.outputs.message }}"
+      - name: Check performance test failure
+        if: ${{ matrix.config.build == 'Release' && steps.performance-test.outputs.pass != 'True' }}
+        run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}"
 
   # This job adds a check named "CI AArch64" that represents overall
   # workflow status and can be used in branch rulesets
diff --git a/.github/workflows/nightly-aarch64.yml b/.github/workflows/nightly-aarch64.yml
index 14e03718500..f026ab36e34 100644
--- a/.github/workflows/nightly-aarch64.yml
+++ b/.github/workflows/nightly-aarch64.yml
@@ -56,6 +56,7 @@ jobs:
         with:
           path: oneDNN
 
+      # Note: This will create a github actions cache
       - name: Get latest CMake and Ninja
         uses: lukka/get-cmake@5f6e04f5267c8133f1273bf2103583fc72c46b17 # v3.31.5
         with:

From 79ef6f01921f48d377319eb3e74844d713f237a7 Mon Sep 17 00:00:00 2001
From: Ryo Suzuki <ryo.suzuki@arm.com>
Date: Wed, 5 Feb 2025 11:17:27 +0000
Subject: [PATCH 4/4] ci: initial regression test

---
 .../performance/bench_performance.sh          |  2 +-
 .../performance/benchdnn_comparison.py        | 12 ++++--
 .github/automation/performance/inputs/conv    |  2 +-
 .github/automation/performance/inputs/matmul  |  2 +-
 .github/workflows/ci-aarch64.yml              | 37 +++++++++++++------
 5 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/.github/automation/performance/bench_performance.sh b/.github/automation/performance/bench_performance.sh
index f0299b1b76b..ff88cd455f0 100755
--- a/.github/automation/performance/bench_performance.sh
+++ b/.github/automation/performance/bench_performance.sh
@@ -27,4 +27,4 @@ do
     $2 --matmul --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/matmul >> $4
     $1 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $3
     $2 --conv --mode=P --perf-template=%prb%,%-time% --batch=${SCRIPT_DIR}/inputs/conv >> $4
-done
\ No newline at end of file
+done
diff --git a/.github/automation/performance/benchdnn_comparison.py b/.github/automation/performance/benchdnn_comparison.py
index 8bb918109ab..71326076dac 100644
--- a/.github/automation/performance/benchdnn_comparison.py
+++ b/.github/automation/performance/benchdnn_comparison.py
@@ -33,14 +33,15 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
     with open(file2) as f:
         r2 = f.readlines()
 
-    # Trim non-formatted lines and split the prolem from time
+    # Trim non-formatted lines and split the problem from time
     r1 = [x.split(",") for x in r1 if x[0:8] == "--mode=P"]
     r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"]
 
+    if (len(r1) == 0) or (len(r2) == 0):
+        raise Exception("One or both of the test results have zero lines")
     if len(r1) != len(r2):
         raise Exception("The number of benchdnn runs do not match")
 
-    # Convert to dict and trim \n
     r1_samples = defaultdict(list)
     r2_samples = defaultdict(list)
 
@@ -62,9 +63,11 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
             failed_tests.append(prb)
             passed = False
 
+        print(prb + (" passed" if passed else " failed"))
+
     if "GITHUB_OUTPUT" in os.environ:
         with open(os.environ["GITHUB_OUTPUT"], "a") as f:
-            f.write(f"pass={passed}")
+            print(f"pass={passed}", file=f)
 
     if passed:
         print("Regression tests passed")
@@ -72,8 +75,9 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
         message = "\n----The following regression tests failed:----\n" + \
                     "\n".join(failed_tests) + "\n"
         if "GITHUB_OUTPUT" in os.environ:
+            out_message = message.replace("\n", "%0A")
             with open(os.environ["GITHUB_OUTPUT"], "a") as f:
-                f.write(f"message={message}")
+                print(f'message={out_message}', file=f)
         print(message)
         raise Exception("Some regression tests failed")
 
diff --git a/.github/automation/performance/inputs/conv b/.github/automation/performance/inputs/conv
index f60f7654074..83c08d04446 100644
--- a/.github/automation/performance/inputs/conv
+++ b/.github/automation/performance/inputs/conv
@@ -19,4 +19,4 @@
 --reset
 --dir=FWD_D
 --dt=f32
-mb1_ic64oc256_ih200oh200kh1sh1dh0ph0_iw267ow267kw1sw1dw0pw0
\ No newline at end of file
+mb1_ic64oc256_ih200oh200kh1sh1dh0ph0_iw267ow267kw1sw1dw0pw0
diff --git a/.github/automation/performance/inputs/matmul b/.github/automation/performance/inputs/matmul
index 5b3b2bf872c..ec3f485a925 100644
--- a/.github/automation/performance/inputs/matmul
+++ b/.github/automation/performance/inputs/matmul
@@ -16,4 +16,4 @@
 # *******************************************************************************
 --reset
 --dt=bf16,f32
-384x1x384:384x384
\ No newline at end of file
+1500x384:384x384
diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml
index 1328c7dd970..be474eaa7ce 100644
--- a/.github/workflows/ci-aarch64.yml
+++ b/.github/workflows/ci-aarch64.yml
@@ -20,6 +20,18 @@ name: "CI AArch64"
 #* To avoid duplicate jobs running when both push and PR is satisfied, we use this:
 #* https://github.com/orgs/community/discussions/26940#discussioncomment-5686753
 on:
+  push:
+    branches: [main, "rls-*"]
+    paths:
+      - ".github/**"
+      - "cmake/**"
+      - "examples/**"
+      - "include/**"
+      - "src/common/**"
+      - "src/cpu/*"
+      - "src/cpu/aarch64/**"
+      - "tests/**"
+      - "CMakeLists.txt"
   pull_request:
     types: [opened, synchronize, reopened]
     paths:
@@ -36,8 +48,10 @@ on:
   workflow_dispatch:
 
 #* Stop stale workflows when pull requests are updated: https://stackoverflow.com/a/70972844
+#* Does not apply to the main branch.
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
 # Declare default permissions as read only.
 permissions: read-all
@@ -160,7 +174,7 @@ jobs:
 
       ## Performance test steps ##
       - name: Checkout oneDNN base
-        if: ${{ matrix.config.build == 'Release' }}
+        if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
           ref: ${{ github.base_ref }}
@@ -168,28 +182,29 @@ jobs:
 
       # TODO :: Create separate pipeline to cache oneDNN base
       - name: Configure oneDNN base
-        if: ${{ matrix.config.build == 'Release' }}
-        run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
+        if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
+        run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh
         working-directory: ${{ github.workspace }}/oneDNN_base
         env:
           ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
           BUILD_TOOLSET: ${{ matrix.config.toolset }}
           CMAKE_BUILD_TYPE: ${{ matrix.config.build }}
           CMAKE_GENERATOR: Ninja
-          GCC_VERSION: 13
+          GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }}
           ONEDNN_ACTION: configure
           ONEDNN_TEST_SET: ${{ matrix.config.testset }}
           ONEDNN_THREADING: ${{ matrix.config.threading }}
 
       - name: Build oneDNN base
-        if: ${{ matrix.config.build == 'Release' }}
-        run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
+        if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
+        run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh
         working-directory: ${{ github.workspace }}/oneDNN_base
         env:
           ONEDNN_ACTION: build
 
-      - shell: bash
-        if: ${{ matrix.config.build == 'Release' }}
+      - name: Run performance tests
+        shell: bash
+        if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
         run: |
           OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt
           OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base.txt new.txt
@@ -197,14 +212,14 @@ jobs:
           DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
 
       - name: Compare performance test results
-        if: ${{ matrix.config.build == 'Release' }}
+        if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' }}
         id: performance-test
         continue-on-error: true
         run: python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base.txt new.txt
 
       - name: Check performance test failure
-        if: ${{ matrix.config.build == 'Release' && steps.performance-test.outputs.pass != 'True' }}
-        run: echo "::warning file=ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}"
+        if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && steps.performance-test.outputs.pass != 'True' }}
+        run: echo "::warning file=.github/workflows/ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}"
 
   # This job adds a check named "CI AArch64" that represents overall
   # workflow status and can be used in branch rulesets